In [57]:
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import pandas
from Code.PK_model import PK_result


image_file = "../Images/data_explore/"

# Pharmacokinetics and Pharmacodynamics of Myelotoxicity

### Contents
- [Introduction](introduction.ipynb)
- **Exploring Data**
    - [Overview](#Overview)
    - [PK Data](#PK-Data)
    - [PD Data](#PD-Data)
- [PK model - Naive Pooled Inference](PK_naive_pooled_inference.ipynb)
- [PD friberg model - Naive Pooled Inference](PD_naive_pooled_inference.ipynb)

## The data

As discussed in section [Forward Model](forward_model.ipynb), many decisions made in mathematical modelling comes from observations made. Also, as will be seen in [parameter inference] and [model selection] sections, observations are required to analyse and refine the models, in order to produce reliable predictions. In this report, I will be using both simulated data, to test and refine the methods used throughout, and experimental data provided by my colleague at Roche.

### Experimental data
The experimental data used in this report is from a preclinical study on rats. The study involves dosing rats with different anti-tumour drugs at differing dose levels. Observations on drug concentrations in the blood stream and nine different blood cell counts are provided in the data.

In [2]:
df = pandas.read_csv("../0470-2008_2018-05-09.csv")
drug_names = df.DRUG.unique()
drug_names = np.delete(drug_names, np.argwhere(drug_names=='Controls'))
df = df.sort_values(['DOSE', 'TIME'], ascending=True)
group = df.groupby('DRUG')
df_view = group.apply(lambda x: x['DOSE'].unique())
df_view = df_view.apply(pandas.Series)
df_view = df_view.replace(np.nan, '', regex=True)
df_view.columns = ['Dose 1', "Dose 2", "Dose 3"]
print(df_view)

             Dose 1 Dose 2 Dose 3
DRUG                             
Controls        0.0              
Docetaxel       5.0   10.0   15.0
Irinotecan     34.0   68.0       
Topotecan       7.5   15.0       
Vinflunine     10.0   20.0       
Vinorelbine     5.0   10.0   20.0


In [3]:
# df_drug = df.loc[(df['DRUG'] == drug) | (df['DRUG'] == 'Controls')]
observation_names = df.YNAME.unique()
print("Observations in dataset:\n")
print("Drug concentration (labelled as one of " + str(drug_names) +")")
for observation in observation_names:
    if observation not in drug_names:
        print(observation)

Observations in dataset:

Drug concentration (labelled as one of ['Vinorelbine' 'Docetaxel' 'Irinotecan' 'Topotecan' 'Vinflunine'])
Red blood cells
Hemoglobin
Platelets 
White blood cells
Neutrophiles absolute
Lymphocytes absolute
Monocytes absolute
Eosinophils absolute
Basophils absolute


In this report I consider the drug Docetaxel and the concentration of platelets in the blood. The PK and platelet data gathered for Docetaxel can be seen in Figure \ref{fig:DocetaxelPlateletData}.

<!-- I will take a naive-pooled approach
           % define naive-pooled
in this report when performing parameter inference and further analyses by assuming each individual can be ascribed the same set of parameters. -->

In [32]:
# Select Drug(s):
drugs = ['Docetaxel']
# Select Obsevation(s):
observations = ["Platelets "]
dose_unit = "mg/kg"

# Refine data
data_sets = {}
units = {}
for drug in drugs:
    df_drug = df.loc[((df['DRUG'] == drug) | (df['DRUG'] == 'Controls'))]
    df_drug = df_drug.drop(df_drug[df_drug['OBS'] == '.'].index)
    df_drug = df_drug.astype({'OBS': 'float64', 'DOSE': 'float64'})
    
    df_PK = df_drug.loc[(df['YNAME'] == drug)]
    data_sets[drug+"_"+drug] = df_PK
    
    units[drug] =  df_PK['UNIT'].values[0]
    
    for observation_name in observations:
        df_PD = df_drug.loc[(df['YNAME'] == observation_name)]
        data_sets[drug+"_"+observation_name] = df_PD
        
        units[observation_name] =  df_PD['UNIT'].values[0]
        df_PD_stats = df_PD[['TIME', 'DOSE', 'OBS']]
        df_PD_stats = df_PD_stats.groupby(["TIME", "DOSE"], as_index=False).filter(lambda x: len(x) > 1).groupby(["TIME", "DOSE"], as_index=False).agg({'OBS':['mean','std']})
        df_PD_stats.columns = ['TIME', "DOSE", 'OBS', 'std']
        data_sets[drug+"_"+observation_name+"_stats"] = df_PD_stats
        
        df_PK_stats = df_PK[['TIME', 'DOSE', 'OBS']]
        df_PK_stats = df_PK_stats.groupby(["TIME", "DOSE"], as_index=False).filter(lambda x: len(x) > 1).groupby(["TIME", "DOSE"], as_index=False).agg({'OBS':['mean','std']})
        df_PK_stats.columns = ['TIME', "DOSE", 'OBS', 'std']
        df_PK_save = df_PK_stats.copy()
        df_PK_save['AMT'] = [None]*len(df_PK_save)
        #Save this data for later use
        for dose in df_PD_stats.DOSE.unique():
            df_PD_save = df_PD_stats[df_PD_stats.DOSE == dose].copy()
            if dose == 0:
                dose_amount = 0
            else:
                dose_amount = df.loc[(df['DRUG'] == drug) & (df['DOSE'] == dose)]
                dose_amount = dose_amount.drop(dose_amount[dose_amount['AMT'] == '.'].index).astype({'AMT': 'float64'}).mean()['AMT']
            df_PD_save["dose_amount"] = [dose_amount]*len(df_PD_save)
            df_PK_save.loc[df_PK_save["DOSE"] == dose, 'AMT'] = dose_amount
        
#             df_PD_save.to_csv(path_or_buf="../Data_and_parameters/pd_real_data_refined_"+drug+"_dose_"+str(dose))
#     df_PK_save.to_csv(path_or_buf="../Data_and_parameters/pk_real_data_refined_"+drug)

for name, data in data_sets.items():
    data_sets[name] = data.rename(columns={'DOSE':'Dose, '+dose_unit,}, errors="raise")

In [34]:
# Create plot
for drug in drugs:
    for name in observations:
        x_label = "Time, hours"
        y_label = name + ", " + units[name]
        df_plot = data_sets[drug+"_"+observation_name]
        
        if name == drug:
            tick_distance = 1
        else:
            tick_distance = 48
        fig = px.scatter(
            df_plot,
            title=drug + " Data: " + name,
            x="TIME",
            y="OBS",
            color='Dose, '+dose_unit,
            size="WT",
            size_max=10,
            width=800, 
            height=500,
            hover_data=list(df_plot.columns))
        fig.update_xaxes(title_text=x_label, tick0=0, dtick=tick_distance)
        fig.update_yaxes(title_text=y_label)
        fig.show()

Lets look at the data for determining the Pharmacokinetic behaviour to see what it can tell us.

In [43]:
for drug in drugs:
    x_label = "Time, hours"
    y_label = drug + " concentration, " + units[drug]
    df_plot = data_sets[drug+"_"+drug]
    
#     dose_categories = df_drug.DOSE.unique()

    fig = px.line(
        df_plot,
        x="TIME",
        y="OBS",
        line_group="ID",
        color='Dose, '+dose_unit,
        width=500, 
        height=360,
        color_discrete_sequence=px.colors.qualitative.Plotly[1:],
        hover_data=list(df_plot.columns))

    fig.update_xaxes(title_text=x_label)
    fig.update_yaxes(title_text=y_label)
    fig.update_layout(yaxis_type="log")
    fig.update_traces(mode='markers+lines')
    
    fig.write_image(image_file + drug + "_PK_traces.svg")
    fig.show()

We can use these models to determine what PK model to use (see the section [forward model](forward_model.ipynb)). If the dose normalised curves line up with each other and do not diverge then that implies the rates in the model are linear. Otherwise one or more of the transference rate or clearance is non-linear. Irinotecan is the only drug in the study that definately does not follow linear dynamics. The data for docetaxel does not fit on a straight line but it is also not fine grain enough to clearly distinguish between the shapes of 2, 3 or more compartment models. So to keep the parameter to data point ratio low, this drug would be best modelled by a two compartment model.

We can also look at the observations other than drug concentration to get a view on the effect of the drug

In [45]:
for drug in drugs:
    for observation_name in observations:
        df_plot = data_sets[drug+"_"+observation_name]

        x_label = "Time, hours"
        y_label = observation_name + ", " + units[observation_name]
        tick_distance = 48

        fig = px.line(
            df_plot,
            x="TIME",
            y="OBS",
            line_group="ID",
            color='Dose, '+dose_unit,
            width=500, 
            height=360,
            hover_data=list(df_plot.columns))

        fig.update_xaxes(title_text=x_label, tick0=0, dtick=tick_distance)
        fig.update_yaxes(title_text=y_label)
        fig.update_traces(mode='markers+lines',
            opacity=1,
                         )

        fig.write_image(image_file + drug +"_" + observation_name + "_traces.svg")
        fig.show()
        
        df_plot = data_sets[drug+"_"+observation_name+"_stats"]
        df_plot = df_plot.rename(columns={'TIME':x_label, 'OBS':'Mean ' + y_label}, errors="raise")
        df_plot = df_plot.astype({'Dose, '+dose_unit: 'str'})
        fig = px.scatter(
            df_plot,
            x=x_label,
            y='Mean ' + y_label,
            error_y = "std",
            facet_col='Dose, '+dose_unit,
            color='Dose, '+dose_unit,
            width=900, 
            height=360,
        )

        fig.update_xaxes(tick0=0, dtick=2*tick_distance)
        fig.update_traces(mode='markers+lines')
        fig.update_layout(showlegend=False)
        fig.write_image(image_file + drug +"_" + observation_name + "_mean.svg")
        fig.show()

This shows there is a lot of variability between individuals. This can be a little messy to see fully so let's only look at a subset of the individuals.

In [9]:
# Finding Random participants to graph

# Relevant Groups and their IDs
ids = {
    'Test_Group_1_ids' : np.arange(5061, 5073), # Control Ids 5061-5072
    'Test_Group_2_ids' : np.arange(5073, 5085), # Vinorelbine 5 Ids 5073-5084
    'Test_Group_3_ids' : np.arange(5085, 5097), # Vinorelbine 10 5085-5096
    'Test_Group_4_ids' : np.arange(5121, 5133), # Docetaxel 5 5121-5132
    'Test_Group_5_ids' : np.arange(5133, 5145), # Docetaxel 10 5133-5144
    'Test_Group_6_ids' : np.arange(5145, 5157), # Irinotecan 34 5145-5156
    'Test_Group_7_ids' : np.concatenate((np.array([5158,5159]), np.arange(5160, 5168))), # Irinotecan 68 5158-5159, 5160-5168
    'Test_Group_1b_ids' : np.arange(5097, 5109), # Vinorelbine 20 5097-5108
    'Test_Group_2b_ids' : np.arange(5109, 5121), # Docetaxel 15 Ids 5109-5120
    'Test_Group_3b_ids' : np.arange(5188, 5200), # Vinoflunine 10 5188-5199
    'Test_Group_4b_ids' : np.arange(5200, 5212), # Vinoflunine 20 5200-5211
    'Test_Group_9b_ids' : np.arange(5176, 5188), # Topotecan 7.5 5176-5187
    'Test_Group_10b_ids' : np.arange(5794, 5806), # Topotecan 15 5794-5805
}
#Selecting randomly from each group
num_to_select = 1
samples = np.array([])
for group in ids:
    sample_from_group = np.random.choice(ids[group], size=num_to_select, replace=False)
    samples = np.concatenate((samples,sample_from_group))
    print(group +": "+str(sample_from_group))

Test_Group_1_ids: [5066]
Test_Group_2_ids: [5084]
Test_Group_3_ids: [5091]
Test_Group_4_ids: [5132]
Test_Group_5_ids: [5139]
Test_Group_6_ids: [5145]
Test_Group_7_ids: [5158]
Test_Group_1b_ids: [5102]
Test_Group_2b_ids: [5120]
Test_Group_3b_ids: [5198]
Test_Group_4b_ids: [5201]
Test_Group_9b_ids: [5178]
Test_Group_10b_ids: [5800]


In [10]:
for drug in drugs:
    for observation_name in observations:
        df_drug = df.loc[((df['DRUG'] == drug) | (df['DRUG'] == 'Controls'))&(df['YNAME'] == observation_name)]
        df_drug = df_drug.drop(df_drug[df_drug['OBS'] == '.'].index)
        df_drug['OBS'] = df_drug['OBS'].astype(float)
        df_sample = df_drug.loc[(df['ID'].isin(samples))]

        dose_unit = df_drug['UNIT'].values[0]

        x_label = "Time, hours"
        y_label = observation_name + ", " + dose_unit
        tick_distance = 48
        dose_categories = df_sample.DOSE.unique()

        fig = px.line(
            df_sample,
            title=drug + " Data: " + observation_name,
            x="TIME",
            y="OBS",
            line_group="ID",
            color="DOSE",
            width=700, 
            height=500,
            hover_data=list(df.columns))

        fig.update_xaxes(title_text=x_label, tick0=0, dtick=tick_distance)
        fig.update_yaxes(title_text=y_label)
        fig.update_traces(mode='markers+lines')
        fig.show()

### Simulated data

The simulated data set is created from the Friberg model with combined multiplicative and constant noise. The more complex combined model is used to generate this data over the other models, as exploring this model would provide information on whether this mcan be identified over the less complex noise models, when the combined noise is known to have produced the data. The parameters used to produce the data are similar to those inferred in section section \ref{section:parameter-inference} to ensure it has similar properties to the data. This data is shown in Figure [???]. 

In [59]:
PK_params = np.load('../simulated_parameters_actual_dose2.npy')
print(PK_params)
PK_params = [0.68, 2.8, 4.9, 2.3, 0.091]
np.save("../Data_and_parameters/pk_sim_actual_params_dose_2.npy", PK_params)

PD_params = np.load("../Data_and_parameters/pd_sim_actual_params_dose_2.npy")
PD_param_names = ["Circ_0", "MTT", "gamma", "slope", "sigma_base", "eta", "sigma_rel"]
print(PD_param_names)
print(PD_params)

[1.  2.5 0.5 2.3]
['Circ_0', 'MTT', 'gamma', 'slope', 'sigma_base', 'eta', 'sigma_rel']
[9.831e+02 8.526e+01 4.400e-01 2.000e-02 4.243e+01 1.000e+00 1.300e-01]


In [73]:
# Create the Data

data_times = [0, 0.083, 0.333,  2.5, 4.666, 4.833, 5]  # To follow the times in the real data

noise = np.random.normal(0, PK_params[-1], len(data_times))
values_no_noise = PK_result(2.0, 2, PK_params[:-1], data_times)[:,0]
values_noisey = values_no_noise*(1+noise)
df = pandas.DataFrame({'TIME' : data_times[1:], 'OBS' : values_noisey[1:]})

# df.to_csv(r'../Data_and_parameters/PK_sim/sythesised_data_real_timepoints.csv', index = False)
more_times = np.linspace(0,max(data_times),1000)
more_values = PK_result(2.0, 2, PK_params[:-1], more_times)[:,0]

# Visualise the data
x_label = "Time, hours"
y_label = "Drug concentration, mg/L"
fig = px.scatter(
    df,
    x='TIME',
    y='OBS',
    width=500, 
    height=360,
    color_discrete_sequence=px.colors.qualitative.Plotly[1:],
    hover_data=list(df.columns))

fig['data'][0]['showlegend']=True
fig['data'][0]['name']='Simulated<br>data'
fig.update_xaxes(title_text=x_label)
fig.update_yaxes(title_text=y_label)
fig.update_layout(yaxis_type="log")

fig.add_trace(
    go.Scatter(
        x=more_times,
        y=more_values,
        mode="lines",
        line=go.scatter.Line(color=px.colors.qualitative.Plotly[0]),
        name="Noiseless<br>Simulation",
    )
)
fig.add_trace(go.Scatter(
    x=np.concatenate((more_times, more_times[::-1])),
    y=np.concatenate((
        more_values*(1+PK_params[-1]), 
        more_values[::-1]*(1-PK_params[-1])
    )),
    marker = dict(color=px.colors.qualitative.Plotly[0]),
    fill='toself',
    mode='lines',
    line=dict(width=0),
    name="Noise",
))


In [None]:
# Create the Data

data_times = [0, 0.083, 0.333,  2.5, 4.666, 4.833, 5]  # To follow the times in the real data

base_noise = np.random.normal(0, PD_params[-1], len(data_times))
rel_noise = np.random.normal(0, PD_params[-3], len(data_times))
values_no_noise = PD_friberg_result(2.0, 2, PK_params[:-1]+PD_params[:-3], data_times, num_transit=3)[:,-1]
values_noisey = values_no_noise+np.power(values_no_noise, PD_params[-2])*(rel_noise) +base_noise
df = pandas.DataFrame({'TIME' : data_times[1:], 'OBS' : values_noisey[1:]})

# df.to_csv(r'../Data_and_parameters/PK_sim/sythesised_data_real_timepoints.csv', index = False)
more_times = np.linspace(0,max(data_times),1000)
more_values = PK_result(2.0, 2, PK_params[:-1], more_times)[:,0]

# Visualise the data
x_label = "Time, hours"
y_label = "Drug concentration, mg/L"
fig = px.scatter(
    df,
    x='TIME',
    y='OBS',
    width=500, 
    height=360,
    color_discrete_sequence=px.colors.qualitative.Plotly[1:],
    hover_data=list(df.columns))

fig['data'][0]['showlegend']=True
fig['data'][0]['name']='Simulated<br>data'
fig.update_xaxes(title_text=x_label)
fig.update_yaxes(title_text=y_label)
fig.update_layout(yaxis_type="log")

fig.add_trace(
    go.Scatter(
        x=more_times,
        y=more_values,
        mode="lines",
        line=go.scatter.Line(color=px.colors.qualitative.Plotly[0]),
        name="Noiseless<br>Simulation",
    )
)
fig.add_trace(go.Scatter(
    x=np.concatenate((more_times, more_times[::-1])),
    y=np.concatenate((
        more_values*(1+PK_params[-1]), 
        more_values[::-1]*(1-PK_params[-1])
    )),
    marker = dict(color=px.colors.qualitative.Plotly[0]),
    fill='toself',
    mode='lines',
    line=dict(width=0),
    name="Noise",
))

Next Notebook: [PK model - Naive Pooled Inference](PK_naive_pooled_inference.ipynb)