In [1]:
## color_gen function was taken from inneka.com programming tutorial ##
# Dependencies and Setup
import pandas as pd
import numpy as np
import bokeh.plotting as bp
from bokeh.palettes import Category10
from bokeh.models import ColumnDataSource, Whisker
#from bokeh.io import export_png
import itertools

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# This will be used to loop through color scheme
def color_gen():
    for c in itertools.cycle(Category10[10]):
        yield c

# File to Load (Remember to Change These)
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data

mouse_df = pd.read_csv(mouse_drug_data_to_load)
clinic_df = pd.read_csv(clinical_trial_data_to_load)
# Combine the data into a single dataset

df = clinic_df.set_index('Mouse ID').join(mouse_df.set_index('Mouse ID'))
# Display the data table for preview

df.head()

Unnamed: 0_level_0,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
Mouse ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
a203,0,45.0,0,Infubinol
a203,5,48.508468,0,Infubinol
a203,10,51.852437,1,Infubinol
a203,15,52.77787,1,Infubinol
a203,20,55.173336,1,Infubinol


## Tumor Response to Treatment

In [2]:
#ean Tumor Volume Data Grouped by Drug and Timepoint 
tum_vol_mean = df.groupby(['Drug', 'Timepoint']).mean()['Tumor Volume (mm3)']
# Convert to DataFrame
tumor_reponse_df = pd.DataFrame({"Tumor Volume (mm3)":tum_vol_mean})
# Preview DataFrame
tumor_reponse_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3)
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,45.000000
Capomulin,5,44.266086
Capomulin,10,43.084291
Capomulin,15,42.064317
Capomulin,20,40.716325
...,...,...
Zoniferol,25,55.432935
Zoniferol,30,57.713531
Zoniferol,35,60.089372
Zoniferol,40,62.916692


In [3]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
tum_vol_sem = df.groupby(['Drug', 'Timepoint']).sem()['Tumor Volume (mm3)']
# Convert to DataFrame
tumor_reponse_sem = pd.DataFrame({"Tumor Volume (mm3)":tum_vol_sem})
# Preview DataFrame
tumor_reponse_sem


Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3)
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,0.000000
Capomulin,5,0.448593
Capomulin,10,0.702684
Capomulin,15,0.838617
Capomulin,20,0.909731
...,...,...
Zoniferol,25,0.602513
Zoniferol,30,0.800043
Zoniferol,35,0.881426
Zoniferol,40,0.998515


In [4]:
# Minor Data Munging to Re-Format the Data Frames
tumor_reponse_df_unstacked = tumor_reponse_df.unstack(level = 'Drug')['Tumor Volume (mm3)']
tumor_reponse_sem_unstacked = tumor_reponse_sem.unstack(level = 'Drug')['Tumor Volume (mm3)']

# Preview that Reformatting worked
tumor_reponse_df_unstacked.head()


Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818
10,43.084291,48.285125,49.403909,49.582269,48.69421,49.423329,49.101541,42.531957,49.463844,48.689881
15,42.064317,50.094055,51.296397,52.399974,50.933018,51.359742,51.067318,41.495061,51.529409,50.779059
20,40.716325,52.157049,53.197691,54.920935,53.644087,54.364417,53.346737,40.238325,54.067395,53.170334


In [5]:
# Preview that Reformatting worked
tumor_reponse_sem_unstacked.head()

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.448593,0.164505,0.235102,0.264819,0.202385,0.218091,0.231708,0.482955,0.239862,0.18895
10,0.702684,0.236144,0.282346,0.357421,0.319415,0.402064,0.376195,0.720225,0.433678,0.263949
15,0.838617,0.332053,0.357705,0.580268,0.444378,0.614461,0.466109,0.770432,0.493261,0.370544
20,0.909731,0.359482,0.47621,0.726484,0.59526,0.839609,0.555181,0.786199,0.621889,0.533182


In [6]:
# Generate the Plot (with Error Bars)
color = color_gen()

#y_list = df.Drug.unique()

#calculate the upper and lower for the error bars
drug_val_dict = {'Capomulin': {'upper':[],'lower': [], 'base':[]},
                 'Ceftamin' : {'upper':[],'lower': [], 'base':[]},
                 'Infubinol': {'upper':[],'lower': [], 'base':[]},
                 'Ketapril' : {'upper':[],'lower': [], 'base':[]},
                 'Naftisol' : {'upper':[],'lower': [], 'base':[]},
                 'Placebo'  : {'upper':[],'lower': [], 'base':[]},
                 'Propriva' : {'upper':[],'lower': [], 'base':[]},
                 'Ramicane' : {'upper':[],'lower': [], 'base':[]},
                 'Stelasyn' : {'upper':[],'lower': [], 'base':[]},
                 'Zoniferol': {'upper':[],'lower': [], 'base':[]}
                }

for key, val in drug_val_dict.items():
    drug_val_dict[key]['lower'].append(tumor_reponse_df_unstacked[key].values - tumor_reponse_sem_unstacked[key].values)
    drug_val_dict[key]['upper'].append(tumor_reponse_df_unstacked[key].values - tumor_reponse_sem_unstacked[key].values)
    drug_val_dict[key]['base'].append(tumor_reponse_df_unstacked[key].values)
    
drug_val_list = list(drug_val_dict.keys())
#print(drug_val_dict['Capomulin']['base'])
source = ColumnDataSource(tumor_reponse_df_unstacked)

fig = bp.figure(title = 'Time Response to Treatment', x_axis_label='Time (Days)',x_range= (0,40), y_axis_label='Tumor Volume (mm3)', y_range=(30,80) )

i = 0
for i, c in zip(range(len(drug_val_list[i])), color):
    fig.line('Timepoint', drug_val_list[i], line_width=2, source=source, color = c)#, legend=drug_val_list[i])
  
for key, val in drug_val_dict.items():
    source_error = ColumnDataSource(drug_val_dict[key])
    fig.add_layout(Whisker(source=source_error, upper ='upper', lower='lower', base = 'base', line_width = 4, line_color = 'black', level ='overlay'))
    
#fig.legend.location = "top_right"
#export_png(fig, filename = "Tumor_Response.png")
bp.output_file('Tumor_Response.html', title='Tumor Response to Treatment') #to show in another window.
bp.save(fig)

try:
    bp.reset_output()
    bp.output_notebook()
    bp.show(fig)
except:
    bp.output_notebook()
    bp.show(fig)


## Metastatic Response to Treatment

In [7]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
meta_mean = df.groupby(['Drug', 'Timepoint']).mean()['Metastatic Sites']
# Convert to DataFrame
meta_mean_df = pd.DataFrame({"Metastatic Sites":meta_mean})
# Preview DataFrame
meta_mean_df


Unnamed: 0_level_0,Unnamed: 1_level_0,Metastatic Sites
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,0.000000
Capomulin,5,0.160000
Capomulin,10,0.320000
Capomulin,15,0.375000
Capomulin,20,0.652174
...,...,...
Zoniferol,25,1.687500
Zoniferol,30,1.933333
Zoniferol,35,2.285714
Zoniferol,40,2.785714


In [8]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
meta_sem = df.groupby(['Drug', 'Timepoint']).sem()['Metastatic Sites']
# Convert to DataFrame
meta_sem_df = pd.DataFrame({"Metastatic Sites":meta_sem})
# Preview DataFrame
meta_sem_df


Unnamed: 0_level_0,Unnamed: 1_level_0,Metastatic Sites
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,0.000000
Capomulin,5,0.074833
Capomulin,10,0.125433
Capomulin,15,0.132048
Capomulin,20,0.161621
...,...,...
Zoniferol,25,0.236621
Zoniferol,30,0.248168
Zoniferol,35,0.285714
Zoniferol,40,0.299791


In [9]:
# Minor Data Munging to Re-Format the Data Frames
meta_mean_df_unstacked = meta_mean_df.unstack(level = 'Drug')['Metastatic Sites']
meta_sem_df_unstacked = meta_sem_df.unstack(level = 'Drug')['Metastatic Sites']

# Preview that Reformatting worked
meta_mean_df_unstacked

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.16,0.380952,0.28,0.304348,0.26087,0.375,0.32,0.12,0.24,0.166667
10,0.32,0.6,0.666667,0.590909,0.52381,0.833333,0.565217,0.25,0.478261,0.5
15,0.375,0.789474,0.904762,0.842105,0.857143,1.25,0.764706,0.333333,0.782609,0.809524
20,0.652174,1.111111,1.05,1.210526,1.15,1.526316,1.0,0.347826,0.952381,1.294118
25,0.818182,1.5,1.277778,1.631579,1.5,1.941176,1.357143,0.652174,1.157895,1.6875
30,1.090909,1.9375,1.588235,2.055556,2.066667,2.266667,1.615385,0.782609,1.388889,1.933333
35,1.181818,2.071429,1.666667,2.294118,2.266667,2.642857,2.3,0.952381,1.5625,2.285714
40,1.380952,2.357143,2.1,2.733333,2.466667,3.166667,2.777778,1.1,1.583333,2.785714
45,1.47619,2.692308,2.111111,3.363636,2.538462,3.272727,2.571429,1.25,1.727273,3.071429


In [10]:
# Preview that Reformatting worked
meta_sem_df_unstacked.head()

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.074833,0.108588,0.091652,0.0981,0.093618,0.100947,0.095219,0.066332,0.087178,0.077709
10,0.125433,0.152177,0.159364,0.142018,0.163577,0.115261,0.10569,0.090289,0.123672,0.109109
15,0.132048,0.180625,0.194015,0.191381,0.158651,0.190221,0.136377,0.115261,0.153439,0.111677
20,0.161621,0.241034,0.234801,0.23668,0.181731,0.234064,0.171499,0.11943,0.200905,0.166378


In [12]:
# Generate the Plot (with Error Bars)

#calculate the upper and lower for the error bars
drug_meta_dict = {
                 'Capomulin': {'upper':[],'lower': [], 'base':[]},
                 'Ceftamin' : {'upper':[],'lower': [], 'base':[]},
                 'Infubinol': {'upper':[],'lower': [], 'base':[]},
                 'Ketapril' : {'upper':[],'lower': [], 'base':[]},
                 'Naftisol' : {'upper':[],'lower': [], 'base':[]},
                 'Placebo'  : {'upper':[],'lower': [], 'base':[]},
                 'Propriva' : {'upper':[],'lower': [], 'base':[]},
                 'Ramicane' : {'upper':[],'lower': [], 'base':[]},
                 'Stelasyn' : {'upper':[],'lower': [], 'base':[]},
                 'Zoniferol': {'upper':[],'lower': [], 'base':[]}
                }

for key, val in drug_meta_dict.items():
    drug_meta_dict[key]['lower'].append(meta_mean_df_unstacked[key].values - meta_sem_df_unstacked[key].values)
    drug_meta_dict[key]['upper'].append(meta_mean_df_unstacked[key].values - meta_sem_df_unstacked[key].values)
    drug_meta_dict[key]['base'].append(meta_mean_df_unstacked[key].values)
    
drug_meta_list = list(drug_meta_dict.keys())

source_meta = bp.ColumnDataSource(meta_mean_df_unstacked)

fig_2 = bp.figure(title = 'Metastatic Spread During Treatment', x_axis_label='Treatment Duration (Days)',x_range= (0,40), y_axis_label='Met. Sites', y_range=(0,4) )

for key, val in drug_meta_dict.items():
    source_error_meta = ColumnDataSource(drug_meta_dict[key])
    fig_2.add_layout(Whisker(source=source_error_meta, upper ='upper', lower='lower', base = 'base', line_width = 3))

i = 0
for i, c in zip(range(len(drug_meta_list)), color):
    fig_2.line('Timepoint', drug_meta_list[i], line_width=2, source=source_meta, color = c)

fig_2.legend.location = "top_left"


bp.output_file('Metastatic_Analysis.html', title='Metastatic Spread During Treatment') #to show in another window.
bp.save(fig_2)

try:
    bp.reset_output()
    bp.output_notebook()
    bp.show(fig_2)
except:
    bp.output_notebook()
    bp.show(fig_2)


## Survival Rates

In [13]:
df = df.reset_index()
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
mouse_count = df.groupby(['Drug', 'Timepoint'])['Mouse ID'].count()
# Convert to DataFrame
mouse_count_df = pd.DataFrame({'Mouse ID': mouse_count})
# Preview DataFrame
mouse_count_df.rename(columns={'Mouse ID': 'Mouse Count'})


Unnamed: 0_level_0,Unnamed: 1_level_0,Mouse Count
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,25
Capomulin,5,25
Capomulin,10,25
Capomulin,15,24
Capomulin,20,23
...,...,...
Zoniferol,25,16
Zoniferol,30,15
Zoniferol,35,14
Zoniferol,40,14


In [14]:
# Minor Data Munging to Re-Format the Data Frames

# Preview the Data Frame
# Minor Data Munging to Re-Format the Data Frames
mouse_count_df_unstacked = mouse_count_df.unstack(level = 'Drug')['Mouse ID']

# Preview that Reformatting worked
mouse_count_df_unstacked

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,25,25,25,25,25,25,26,25,26,25
5,25,21,25,23,23,24,25,25,25,24
10,25,20,21,22,21,24,23,24,23,22
15,24,19,21,19,21,20,17,24,23,21
20,23,18,20,19,20,19,17,23,21,17
25,22,18,18,19,18,17,14,23,19,16
30,22,16,17,18,15,15,13,23,18,15
35,22,14,12,17,15,14,10,21,16,14
40,21,14,10,15,15,12,9,20,12,14
45,21,13,9,11,13,11,7,20,11,14


In [15]:
# Preview the Data Frame
# Update Data Frame to turn values to percentage
mouse_percent_df_unstacked = round((((mouse_count_df_unstacked) / (mouse_count_df_unstacked.iloc[0]))*100), 2)

# Preview that Reformatting worked
mouse_percent_df_unstacked

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
5,100.0,84.0,100.0,92.0,92.0,96.0,96.15,100.0,96.15,96.0
10,100.0,80.0,84.0,88.0,84.0,96.0,88.46,96.0,88.46,88.0
15,96.0,76.0,84.0,76.0,84.0,80.0,65.38,96.0,88.46,84.0
20,92.0,72.0,80.0,76.0,80.0,76.0,65.38,92.0,80.77,68.0
25,88.0,72.0,72.0,76.0,72.0,68.0,53.85,92.0,73.08,64.0
30,88.0,64.0,68.0,72.0,60.0,60.0,50.0,92.0,69.23,60.0
35,88.0,56.0,48.0,68.0,60.0,56.0,38.46,84.0,61.54,56.0
40,84.0,56.0,40.0,60.0,60.0,48.0,34.62,80.0,46.15,56.0
45,84.0,52.0,36.0,44.0,52.0,44.0,26.92,80.0,42.31,56.0


In [17]:
# Generate the Plot (Accounting for percentages)

source_surv = bp.ColumnDataSource(mouse_percent_df_unstacked)

fig_3 = bp.figure(title = 'Survival During Treatment', x_axis_label='Time (Days)',x_range= (0,50), y_axis_label='Survival Rate(%)', y_range=(40,100))
i = 0
for i, c in zip(range(len(drug_val_list)), color):
    fig_3.line('Timepoint', drug_val_list[i], line_width=2, source=source_surv, color = c)

bp.output_file('Survival_Rate.html', title='Survival During Treatment') #to show in another window.
bp.save(fig_3)

try:
    bp.reset_output()
    bp.output_notebook()
    bp.show(fig_3)
except:
    bp.output_notebook()
    bp.show(fig_3)


## Summary Bar Graph

In [18]:
# Calculate the percent changes for each drug
percent_change =  ((tumor_reponse_df_unstacked.iloc[-1] - tumor_reponse_df_unstacked.iloc[0]) / tumor_reponse_df_unstacked.iloc[0] * 100)

# Create df from series
percent_change_df = pd.DataFrame(percent_change, ).loc[['Capomulin', 'Infubinol', 'Ketapril', 'Placebo']].rename(columns={0:'Percent Change'}).reset_index()

# Display the data to confirm
percent_change_df


Unnamed: 0,Drug,Percent Change
0,Capomulin,-19.475303
1,Infubinol,46.123472
2,Ketapril,57.028795
3,Placebo,51.29796


In [19]:
# Store all Relevant Percent Changes into a Tuple

bp.output_file('Final_Summary.html')

source_sum = ColumnDataSource(percent_change_df)

final_fig = bp.figure(x_range=percent_change_df['Drug'], plot_height=500, title="Tumor Change Over 45 Day Treatment")
final_fig.vbar(x='Drug', top='Percent Change', width=0.9, source=source_sum, legend_field="Drug")

final_fig.xgrid.grid_line_color = None
final_fig.y_range.start = -20
final_fig.y_range.end = 60
final_fig.legend.orientation = "vertical"
final_fig.legend.location = "top_right"


# Save the Figure
bp.output_file('Summary_Report.html', title='Tumor Change Over 45 Day Treatment')
bp.save(final_fig)

# Show the Figure
try:
    bp.reset_output()
    bp.output_notebook()
    bp.show(final_fig)
except:
    bp.output_notebook()
    bp.show(final_fig)



## OBSERVATION:

1.	Of the group of medications given to the mice, Capomulin and Ramicane were the most successful. They not only had the highest number of mice by the end of the study, but also these drugs slowed the growth of the tumor. 
2.	The least successful drug was Propvia. Though it did slow the growth of the drug more than others, the rate of fatalities among the mice was quite high. By the end of the 45 day study, there was only 26% of mice left. There was drastic jump in growth of fatalities amongst the mice between the 10th and 15th day of the study.
3.	Infubinol and Ketapril were other failed drugs. Mice that were given Ketapril experienced the highest amount of metastatic sites with a survival rate of 44% which is about the same as the placebo. Mice that were given Infubinol had one of the slowest metastatic growths but experienced the second highest increase of deaths.  
