In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

import warnings
warnings.filterwarnings('ignore')

mouse_drug_path = os.path.join('data', 'mouse_drug_data.csv')
clinical_path = os.path.join('data', 'clinicaltrial_data.csv')

mouse_df = pd.read_csv(mouse_drug_path)
clinical_df = pd.read_csv(clinical_path)

plt.rcParams['figure.figsize'] = [10, 7.5]

In [2]:
df = clinical_df.merge(mouse_df, on='Mouse ID')
df.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
0,b128,0,45.0,0,Capomulin
1,b128,5,45.651331,0,Capomulin
2,b128,10,43.270852,0,Capomulin
3,b128,15,43.784893,0,Capomulin
4,b128,20,42.731552,0,Capomulin


## Tumor Response to Treatment

In [3]:
grouped_df = df.groupby(['Drug', 'Timepoint'])
mean_tumor_volume = grouped_df['Tumor Volume (mm3)'].mean().to_frame()
stderr_tumor_volume = grouped_df['Tumor Volume (mm3)'].sem().to_frame()

mean_tumor_volume.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3)
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,45.0
Capomulin,5,44.266086
Capomulin,10,43.084291
Capomulin,15,42.064317
Capomulin,20,40.716325


In [4]:
mtv = mean_tumor_volume.unstack().T.reset_index().set_index('Timepoint').drop('level_0', axis=1)
mtv

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818
10,43.084291,48.285125,49.403909,49.582269,48.69421,49.423329,49.101541,42.531957,49.463844,48.689881
15,42.064317,50.094055,51.296397,52.399974,50.933018,51.359742,51.067318,41.495061,51.529409,50.779059
20,40.716325,52.157049,53.197691,54.920935,53.644087,54.364417,53.346737,40.238325,54.067395,53.170334
25,39.939528,54.287674,55.715252,57.678982,56.731968,57.482574,55.504138,38.9743,56.166123,55.432935
30,38.769339,56.769517,58.299397,60.994507,59.559509,59.809063,58.196374,38.703137,59.826738,57.713531
35,37.816839,58.827548,60.742461,63.371686,62.685087,62.420615,60.350199,37.451996,62.440699,60.089372
40,36.958001,61.467895,63.162824,66.06858,65.600754,65.052675,63.045537,36.574081,65.356386,62.916692
45,36.236114,64.132421,65.755562,70.662958,69.265506,68.084082,66.258529,34.955595,68.43831,65.960888


In [5]:
mtv_stderr = stderr_tumor_volume.unstack().T.reset_index().set_index('Timepoint').drop('level_0', axis=1)
mtv_stderr

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.448593,0.164505,0.235102,0.264819,0.202385,0.218091,0.231708,0.482955,0.239862,0.18895
10,0.702684,0.236144,0.282346,0.357421,0.319415,0.402064,0.376195,0.720225,0.433678,0.263949
15,0.838617,0.332053,0.357705,0.580268,0.444378,0.614461,0.466109,0.770432,0.493261,0.370544
20,0.909731,0.359482,0.47621,0.726484,0.59526,0.839609,0.555181,0.786199,0.621889,0.533182
25,0.881642,0.439356,0.550315,0.755413,0.813706,1.034872,0.577401,0.746991,0.741922,0.602513
30,0.93446,0.49062,0.631061,0.934121,0.975496,1.218231,0.746045,0.864906,0.899548,0.800043
35,1.052241,0.692248,0.984155,1.127867,1.013769,1.287481,1.084929,0.967433,1.003186,0.881426
40,1.223608,0.708505,1.05522,1.158449,1.118567,1.370634,1.564779,1.128445,1.410435,0.998515
45,1.223977,0.902358,1.144427,1.453186,1.416363,1.351726,1.888586,1.226805,1.576556,1.003576


In [6]:
relevant_drugs = ['Capomulin', 'Infubinol', 'Ketapril', 'Placebo']

data = mtv.loc[:, relevant_drugs]
data_err = mtv_stderr.loc[:, relevant_drugs]

In [None]:
data.plot(style='o-', yerr=data_err, alpha=0.75);
plt.grid();
plt.legend(loc='outer');
plt.ylabel('Tumor Volume (mm3)');
plt.xlabel('Time (days)')
plt.title('Tumor Response to Treatment');

save_path = os.path.join('Assets', 'TumorResponseToTreatment.png')
plt.savefig(save_path)

## Metastatic Response to Treatment

In [None]:
mean_met_site = grouped_df['Metastatic Sites'].mean().to_frame()
stderr_met_site = grouped_df['Metastatic Sites'].sem().to_frame()

mean_met_site.head()

In [None]:
stderr_met_site.head()

In [None]:
met_sites = mean_met_site.unstack().T.reset_index().set_index('Timepoint').drop('level_0', axis=1)
met_sites_stderr = stderr_met_site.unstack().T.reset_index().set_index('Timepoint').drop('level_0', axis=1)

met_sites

In [None]:
data = met_sites.loc[:, relevant_drugs]
data_err = met_sites_stderr.loc[:, relevant_drugs]

data.plot(yerr=data_err, alpha=0.75);
plt.grid();
plt.legend(loc='best');
plt.ylabel('Met. Sites');
plt.xlabel('Treatment Duration (days)')
plt.title('Metastatic Spread During Treatment');

save_path = os.path.join('Assets', 'MetastaticResponseToTreatment.png')
plt.savefig(save_path)

## Survival Rates

In [None]:
mice_count = grouped_df['Mouse ID'].count().to_frame()
mice_count.head()

In [None]:
mice = mice_count.unstack().T.reset_index().set_index('Timepoint').drop('level_0', axis=1)
mice

In [None]:
# Generate the Plot (Accounting for percentages)

# Save the Figure

# Show the Figure
plt.show()

In [None]:
mice_percent = mice.copy()
for i in range(len(mice.columns)):
    for j in range(len(mice.index)):
        mice_percent.iloc[j, i] = mice.iloc[j, i] / mice.iloc[0, i] * 100
        
mice_percent

In [None]:
data = mice_percent.loc[:, relevant_drugs]

In [None]:
x = data.index

plt.step(x=x, y=data['Capomulin'], label='Capomulin')
plt.step(x=x, y=data['Infubinol'], label='Infubinol')
plt.step(x=x, y=data['Ketapril'], label='Ketapril')
plt.step(x=x, y=data['Placebo'], label='Placebo')

plt.grid();
plt.ylabel('Survival Rate (%)');
plt.xlabel('Time (Days)');
plt.title('Survival During Treatment');
plt.legend(loc='best');

save_path = os.path.join('Assets', 'SurvivalRates.png')
plt.savefig(save_path)

## Summary Bar Graph

In [None]:
drugs = grouped_df['Tumor Volume (mm3)'].mean().to_frame().unstack()

drug_response = (drugs.iloc[:, -1] - drugs.iloc[:, 0]) / drugs.iloc[:, 0] * 100
drug_response

In [None]:
# Choose only relevant drugs
relevant_drugs = drug_response[['Capomulin', 'Infubinol', 'Ketapril', 'Placebo']]

# Seperate colors for bar graph
colors = ['green' if x <= 0 else 'red' for x in relevant_drugs]

# Label bars in graph with text based on sign
def label_bars(rects):
    for rect in rects:
        height = rect.get_height()
        if height > 0:
            ax.text(rect.get_x() + rect.get_width()/2., 5,
                    f'{height:.0f}%', ha='center', va='bottom', color='white')
        else:
            ax.text(rect.get_x() + rect.get_width()/2., -5,
                    f'{height:.0f}%', ha='center', va='bottom', color='white')
            
fig, ax = plt.subplots()
bars = ax.bar(x=relevant_drugs.index, height=relevant_drugs, color=colors, width=1)
plt.grid(axis='y')
plt.ylabel('% Tumor Volume Change')
plt.xticks(rotation='horizontal')
plt.title('Tumor Change Over 45 Day Treatment')
label_bars(bars)

save_path = os.path.join('Assets', 'SummaryBarGraph.png')
plt.savefig(save_path)