In [10]:
# Dependencies and Setup
%matplotlib notebook
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import stats
from scipy.stats import sem



# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_df = pd.read_csv("mouse_drug_data.csv")
mouse_df.head()

Unnamed: 0,Mouse ID,Drug
0,f234,Stelasyn
1,x402,Stelasyn
2,a492,Stelasyn
3,w540,Stelasyn
4,v764,Stelasyn


In [11]:
clinical_df = pd.read_csv("clinicaltrial_data.csv")

clinical_df.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,b128,0,45.0,0
1,f932,0,45.0,0
2,g107,0,45.0,0
3,a457,0,45.0,0
4,c819,0,45.0,0


In [12]:
# Combine the data into a single dataset
combined_mouse_df = pd.merge(mouse_df, clinical_df, on="Mouse ID")
combined_mouse_df.head()

# Display the data table for preview


Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,f234,Stelasyn,0,45.0,0
1,f234,Stelasyn,5,47.313491,0
2,f234,Stelasyn,10,47.904324,0
3,f234,Stelasyn,15,48.735197,1
4,f234,Stelasyn,20,51.112713,2


## Tumor Response to Treatment

In [13]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
Tumor_group = combined_mouse_df.groupby(['Drug', 'Timepoint'])
Tumor_group.head()



Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,f234,Stelasyn,0,45.000000,0
1,f234,Stelasyn,5,47.313491,0
2,f234,Stelasyn,10,47.904324,0
3,f234,Stelasyn,15,48.735197,1
4,f234,Stelasyn,20,51.112713,2
...,...,...,...,...,...
1780,x773,Placebo,35,55.192736,4
1781,x773,Placebo,40,55.987676,4
1782,x773,Placebo,45,58.634971,4
1802,y478,Placebo,40,65.971120,1


In [14]:
average_tumor_group = Tumor_group.mean()
# average_tumor_group = average_tumor_group.rename(columns={"Tumor Volume (mm3)":"Average Tumor Volume (mm3)"})

# Convert to DataFrame
average_tumor_group= pd.DataFrame(average_tumor_group)
# Preview DataFrame
average_tumor_group = average_tumor_group.reset_index()
average_tumor_group.head()

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,Capomulin,0,45.0,0.0
1,Capomulin,5,44.266086,0.16
2,Capomulin,10,43.084291,0.32
3,Capomulin,15,42.064317,0.375
4,Capomulin,20,40.716325,0.652174


In [15]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
sem_tumor_group =pd.DataFrame(Tumor_group.sem())

sem_tumor_group = sem_tumor_group.reset_index()
# Convert to DataFrame
sem_tumor_group.head()
# Preview DataFrame



Unnamed: 0,Drug,Timepoint,Metastatic Sites,Mouse ID,Tumor Volume (mm3)
0,Capomulin,0,0.0,,0.0
1,Capomulin,5,0.074833,,0.448593
2,Capomulin,10,0.125433,,0.702684
3,Capomulin,15,0.132048,,0.838617
4,Capomulin,20,0.161621,,0.909731


In [16]:
# Minor Data Munging to Re-Format the Data Frames
tumor_mean = average_tumor_group.pivot(index="Timepoint", columns="Drug", values= "Tumor Volume (mm3)")
tumor_error = sem_tumor_group.pivot(index="Timepoint", columns="Drug", values="Tumor Volume (mm3)")
# Preview that Reformatting worked
print(tumor_mean.head())
print(tumor_error.head())

Drug       Capomulin   Ceftamin  Infubinol   Ketapril   Naftisol    Placebo  \
Timepoint                                                                     
0          45.000000  45.000000  45.000000  45.000000  45.000000  45.000000   
5          44.266086  46.503051  47.062001  47.389175  46.796098  47.125589   
10         43.084291  48.285125  49.403909  49.582269  48.694210  49.423329   
15         42.064317  50.094055  51.296397  52.399974  50.933018  51.359742   
20         40.716325  52.157049  53.197691  54.920935  53.644087  54.364417   

Drug        Propriva   Ramicane   Stelasyn  Zoniferol  
Timepoint                                              
0          45.000000  45.000000  45.000000  45.000000  
5          47.248967  43.944859  47.527452  46.851818  
10         49.101541  42.531957  49.463844  48.689881  
15         51.067318  41.495061  51.529409  50.779059  
20         53.346737  40.238325  54.067395  53.170334  
Drug       Capomulin  Ceftamin  Infubinol  Ketapril  N

In [17]:
# Generate the Plot (with Error Bars)
fig, ax=plt.subplots()
fig.suptitle("Tumor Response to Treatment", fontsize=10, fontweight="bold")
ax.errorbar(tumor_mean.index, tumor_mean["Capomulin"], tumor_error["Capomulin"], fmt="o", color="tab:blue", alpha=0.5, label="Capomulin")
ax.errorbar(tumor_mean.index, tumor_mean["Ceftamin"], tumor_error["Ceftamin"], fmt="o", color="tab:brown", alpha=0.5, label="Ceftamin")
ax.errorbar(tumor_mean.index, tumor_mean["Infubinol"], tumor_error["Infubinol"], fmt="o", color="tab:orange", alpha=0.5, label="Infubinol")
ax.errorbar(tumor_mean.index, tumor_mean["Ketapril"], tumor_error["Ketapril"], fmt="o", color="tab:pink", alpha=0.5, label="Ketapril")
ax.errorbar(tumor_mean.index, tumor_mean["Naftisol"], tumor_error["Naftisol"], fmt="o", color="tab:green", alpha=0.5, label="Naftisol")
ax.errorbar(tumor_mean.index, tumor_mean["Propriva"], tumor_error["Propriva"], fmt="o", color="tab:gray", alpha=0.5, label="Propiva")
ax.errorbar(tumor_mean.index, tumor_mean["Ramicane"], tumor_error["Ramicane"], fmt="o", color="tab:red", alpha=0.5, label="Ramicane")
ax.errorbar(tumor_mean.index, tumor_mean["Stelasyn"], tumor_error["Stelasyn"], fmt="o", color="tab:olive", alpha=0.5, label="Stelasyn")
ax.errorbar(tumor_mean.index, tumor_mean["Zoniferol"], tumor_error["Zoniferol"], fmt="o", color="tab:purple", alpha=0.5, label="Zoniferol")
ax.errorbar(tumor_mean.index, tumor_mean["Placebo"], tumor_error["Placebo"], fmt="o", color="tab:cyan", alpha=0.5, label="Placebo")

ax.set_xlim(0, 50)
ax.set_ylim(35, 75)
ax.set_xlabel ("Time (Days)")
ax.set_ylabel ("Tumor Volume (mm3)")
plt.legend(loc="best", fontsize="small", fancybox=True)
plt.grid()
plt.show()
# Save the Figure



<IPython.core.display.Javascript object>

![Tumor Response to Treatment](../Images/treatment.png)

## Metastatic Response to Treatment

In [21]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
met_sites= average_tumor_group.reset_index()
met_sites= met_sites[["Drug", "Timepoint", "Metastatic Sites"]]

# Convert to DataFrame
met_sites=pd.DataFrame(met_sites)
# Preview DataFrame
met_sites.head()

Unnamed: 0,Drug,Timepoint,Metastatic Sites
0,Capomulin,0,0.0
1,Capomulin,5,0.16
2,Capomulin,10,0.32
3,Capomulin,15,0.375
4,Capomulin,20,0.652174


In [22]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
error_sites=sem_tumor_group
error_sites=error_sites.reset_index()
error_sites=error_sites[["Drug", "Timepoint", "Metastatic Sites"]]
# Convert to DataFrame
# error_sites=pd.DataFrame(error_sites)
# Preview DataFrame
error_sites.head()

Unnamed: 0,Drug,Timepoint,Metastatic Sites
0,Capomulin,0,0.0
1,Capomulin,5,0.074833
2,Capomulin,10,0.125433
3,Capomulin,15,0.132048
4,Capomulin,20,0.161621


In [23]:
# Minor Data Munging to Re-Format the Data Frames
site_mean = met_sites.pivot(index="Timepoint", columns= "Drug", values="Metastatic Sites")
site_error = error_sites.pivot(index="Timepoint", columns="Drug", values="Metastatic Sites")
# Preview that Reformatting worked
print(site_mean.head())
print(site_error.head())

Drug       Capomulin  Ceftamin  Infubinol  Ketapril  Naftisol   Placebo  \
Timepoint                                                                 
0           0.000000  0.000000   0.000000  0.000000  0.000000  0.000000   
5           0.160000  0.380952   0.280000  0.304348  0.260870  0.375000   
10          0.320000  0.600000   0.666667  0.590909  0.523810  0.833333   
15          0.375000  0.789474   0.904762  0.842105  0.857143  1.250000   
20          0.652174  1.111111   1.050000  1.210526  1.150000  1.526316   

Drug       Propriva  Ramicane  Stelasyn  Zoniferol  
Timepoint                                           
0          0.000000  0.000000  0.000000   0.000000  
5          0.320000  0.120000  0.240000   0.166667  
10         0.565217  0.250000  0.478261   0.500000  
15         0.764706  0.333333  0.782609   0.809524  
20         1.000000  0.347826  0.952381   1.294118  
Drug       Capomulin  Ceftamin  Infubinol  Ketapril  Naftisol   Placebo  \
Timepoint                   

In [24]:
# Generate the Plot (with Error Bars)
fig, ax= plt.subplots()
fig.suptitle("Metastatic Response to Treatment", fontsize=10, fontweight="bold")
ax.errorbar(site_mean.index, site_mean["Capomulin"], site_error["Capomulin"], fmt="o", color="tab:blue", alpha=0.5, label="Capomulin")
ax.errorbar(site_mean.index, site_mean["Ceftamin"], site_error["Ceftamin"], fmt="o", color="tab:brown", alpha=0.5, label="Ceftamin")
ax.errorbar(site_mean.index, site_mean["Infubinol"], site_error["Infubinol"], fmt="o", color="tab:orange", alpha=0.5, label="Infubinol")
ax.errorbar(site_mean.index, site_mean["Ketapril"], site_error["Ketapril"], fmt="o", color="tab:pink", alpha=0.5, label="Ketapril")
ax.errorbar(site_mean.index, site_mean["Naftisol"], site_error["Naftisol"], fmt="o", color="tab:green", alpha=0.5, label="Naftisol")
ax.errorbar(site_mean.index, site_mean["Propriva"], site_error["Propriva"], fmt="o", color="tab:gray", alpha=0.5, label="Propiva")
ax.errorbar(site_mean.index, site_mean["Ramicane"], site_error["Ramicane"], fmt="o", color="tab:red", alpha=0.5, label="Ramicane")
ax.errorbar(site_mean.index, site_mean["Stelasyn"], site_error["Stelasyn"], fmt="o", color="tab:olive", alpha=0.5, label="Stelasyn")
ax.errorbar(site_mean.index, site_mean["Zoniferol"], site_error["Zoniferol"], fmt="o", color="tab:purple", alpha=0.5, label="Zoniferol")
ax.errorbar(site_mean.index, site_mean["Placebo"], site_error["Placebo"], fmt="o", color="tab:cyan", alpha=0.5, label="Placebo")

ax.set_xlim(0, 50)
ax.set_ylim(0.00, 4.00)
ax.set_xlabel ("Treatment Duration (Days)")
ax.set_ylabel ("Metastatic Sites")
plt.legend(loc="best", fontsize="small", fancybox=True)
plt.grid()
plt.show()
# Save the Figure

# Show the Figure


<IPython.core.display.Javascript object>

![Metastatic Spread During Treatment](../Images/spread.png)

## Survival Rates

In [25]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
mice_grouped = combined_mouse_df.groupby(["Drug", "Timepoint"])
# Convert to DataFrame
mice_count = mice_grouped.count()
mice_count = mice_count.reset_index()
mice_count = mice_count[["Drug", "Timepoint", "Mouse ID"]]
mice_count = mice_count.rename(columns={"Mouse ID": "Mouse Count"})
# Preview DataFrame
mice_count.head()

Unnamed: 0,Drug,Timepoint,Mouse Count
0,Capomulin,0,25
1,Capomulin,5,25
2,Capomulin,10,25
3,Capomulin,15,24
4,Capomulin,20,23


In [26]:
# Minor Data Munging to Re-Format the Data Frames
mouse_count = mice_count.pivot(index="Timepoint", columns="Drug", values="Mouse Count")
# Preview the Data Frame
mouse_count.head()

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,25,25,25,25,25,25,26,25,26,25
5,25,21,25,23,23,24,25,25,25,24
10,25,20,21,22,21,24,23,24,23,22
15,24,19,21,19,21,20,17,24,23,21
20,23,18,20,19,20,19,17,23,21,17


In [27]:
# Generate the Plot (Accounting for percentages)
fig, ax = plt.subplots()
fig.suptitle("Survival Rates", fontsize=10, fontweight="bold")
ax.scatter(mouse_count.index, (mouse_count["Capomulin"]/25*100), marker="o", color="tab:blue", alpha=0.5, label="Capomulin")
ax.scatter(mouse_count.index, (mouse_count["Ceftamin"]/25*100), marker="o", color="tab:brown", alpha=0.5, label="Ceftamin")
ax.scatter(mouse_count.index, (mouse_count["Infubinol"]/25*100), marker="o", color="tab:orange", alpha=0.5, label="Infubinol")
ax.scatter(mouse_count.index, (mouse_count["Ketapril"]/25*100), marker="o", color="tab:pink", alpha=0.5, label="Ketapril")
ax.scatter(mouse_count.index, (mouse_count["Naftisol"]/25*100), marker="o", color="tab:green", alpha=0.5, label="Naftisol")
ax.scatter(mouse_count.index, (mouse_count["Propriva"]/26*100), marker="o", color="tab:gray", alpha=0.5, label="Propriva")
ax.scatter(mouse_count.index, (mouse_count["Ramicane"]/25*100), marker="o", color="tab:red", alpha=0.5, label="Ramicane")
ax.scatter(mouse_count.index, (mouse_count["Stelasyn"]/26*100), marker="o", color="tab:olive", alpha=0.5, label="Stelasyn")
ax.scatter(mouse_count.index, (mouse_count["Zoniferol"]/25*100), marker="o", color="tab:purple", alpha=0.5, label="Zoniferol")
ax.scatter(mouse_count.index, (mouse_count["Placebo"]/25*100), marker="o", color="tab:cyan", alpha=0.5, label="Placebo")
ax.set_xlim(0,50)
ax.set_ylim(0,105)
ax.set_xlabel("Time (Days)")
ax.set_ylabel("Survival Rate %")
plt.legend(loc="best", fontsize="small", fancybox=True)
plt.grid()
# Save the Figure

# Show the Figure
plt.show()

<IPython.core.display.Javascript object>

![Metastatic Spread During Treatment](../Images/survival.png)

## Summary Bar Graph

In [32]:
# Calculate the percent changes for each drug
Capomulin= tumor_mean["Capomulin"]
Capomulin_change=(Capomulin[45]-Capomulin[0])/Capomulin[0]*100
Ceftamin=tumor_mean["Ceftamin"]
Ceftamin_change=(Ceftamin[45]-Ceftamin[0])/Ceftamin[0]*100
Infubinol=tumor_mean["Infubinol"]
Infubinol_change=(Infubinol[45]-Infubinol[0])/Infubinol[0]*100
Ketapril=tumor_mean["Ketapril"]
Ketapril_change=(Ketapril[45]-Ketapril[0])/Ketapril[0]*100
Naftisol=tumor_mean["Naftisol"]
Naftisol_change=(Naftisol[45]-Naftisol[0])/Naftisol[0]*100
Propriva=tumor_mean["Propriva"]
Propriva_change=(Propriva[45]-Propriva[0])/Propriva[0]*100
Ramicane=tumor_mean["Ramicane"]
Ramicane_change=(Ramicane[45]-Ramicane[0])/Ramicane[0]*100
Stelasyn=tumor_mean["Stelasyn"]
Stelasyn_change=(Stelasyn[45]-Stelasyn[0])/Stelasyn[0]*100
Zoniferol=tumor_mean["Zoniferol"]
Zoniferol_change=(Zoniferol[45]-Zoniferol[0])/Zoniferol[0]*100
Placebo=tumor_mean["Placebo"]
Placebo_change=(Placebo[45]-Placebo[0])/Placebo[0]*100
# Display the data to confirm
drugpos= [Ceftamin_change, Infubinol_change, Ketapril_change, Naftisol_change, Propriva_change,  Stelasyn_change, Zoniferol_change, Placebo_change]
drugpos
drugneg=[Capomulin_change, Ramicane_change]
drugneg

[-19.475302667894173, -22.320900462766673]

In [35]:
# Store all Relevant Percent Changes into a Tuple
drugspos =("Ceftamin","Infubinol","Ketapril","Naftisol","Propriva", "Stelasyn","Zoniferol","Placebo")
drugsneg=("Capomulin", "Ramicane")
drugs = ("Capomulin","Ceftamin","Infubinol","Ketapril","Naftisol","Propriva","Ramicane","Stelasyn","Zoniferol","Placebo")
drugcount = 10
xspace= np.arange(drugcount)
width = 1
fig, ax = plt.subplots()
bars1 = ax.bar(xspace, drugpos, width, label=drugspos, color = "tab:olive")
bars2 = ax.bar(0, drugneg, width, label=drugsneg, color = "tab:red")
ax.set_ylabel("Tumor Volume Change %")
ax.set_title("Tumor Change Over 45 Day Treatment")
ax.set_xticks(ind + width /2)
ax.set_xticklabels(("Capomulin","Ceftamin","Infubinol","Ketapril","Naftisol","Propriva","Ramicane","Stelasyn","Zoniferol","Placebo"))
ax.set_ylim(-25, 65)

def autolabel (bars):
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., 1.01*height, "%" %int(height), ha="center", va="bottom")
autolabel(bars1)
autolabel(bars2)
plt.grid()
plt.show()
# Splice the data between passing and failing drugs


# Orient widths. Add labels, tick marks, etc. 


# Use functions to label the percentages of changes


# Call functions to implement the function calls


# Save the Figure


# Show the Figure


<IPython.core.display.Javascript object>

ValueError: shape mismatch: objects cannot be broadcast to a single shape

![Metastatic Spread During Treatment](../Images/change.png)