In [None]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import stats

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data
mouseDrugDF = pd.read_csv(mouse_drug_data_to_load)
trialDF = pd.read_csv(clinical_trial_data_to_load)

#Remove rows where mouseID == "g989"; listed twice with two different drugs
mouseDrugDF = mouseDrugDF.drop_duplicates(subset = 'Mouse ID', keep = False)

# Combine the data into a single dataset
mouseTrialsDF = pd.merge(trialDF, mouseDrugDF, how = "left", on = ["Mouse ID"])

# Display the data table for preview
mouseTrialsDF.head()

## Tumor Response to Treatment

In [None]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
tumorVolumeByDrug = mouseTrialsDF.groupby(["Drug", "Timepoint"])['Tumor Volume (mm3)'].mean()

# Convert to DataFrame
tumorVolumeByDrug = pd.DataFrame(tumorVolumeByDrug).reset_index()

# Preview DataFrame
tumorVolumeByDrug.head()

In [None]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
stdErrorTumorVol = mouseTrialsDF.groupby(["Drug", "Timepoint"])['Tumor Volume (mm3)'].sem()

# Convert to DataFrame
stdErrorTumorVol = pd.DataFrame(stdErrorTumorVol).reset_index()

# Preview DataFrame
stdErrorTumorVol.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
tumorVolVsTime = tumorVolumeByDrug.pivot(index='Timepoint', columns='Drug', values='Tumor Volume (mm3)')
stdErrVsTime = stdErrorTumorVol.pivot(index='Timepoint', columns='Drug', values='Tumor Volume (mm3)')
stdErrVsTime.head()


In [None]:
drugList = ["Capomulin", "Infubinol", "Ketapril", "Placebo"]
plotStyleDict = {"Capomulin":'ro:', 'Infubinol':'b^:', 'Ketapril':'gs:', 'Placebo': 'kd:'}

# Generate the Plot (with Error Bars)

fig1, ax1 = plt.subplots()
for drug in drugList:
    ax1.errorbar(tumorVolVsTime.index, tumorVolVsTime[drug], stdErrVsTime[drug], fmt = plotStyleDict[drug])
ax1.legend()
plt.grid(True)
plt.title('Tumor Response to Treatment')
plt.xlabel('Time (Days)')
plt.ylabel('Tumor Volume (mm3)')

#Save plot 
plt.savefig("TumorResponseToTreatment.png")
plt.show()


In [None]:
# Show the Figure
plt.show()

![Tumor Response to Treatment](../Images/treatment.png)

## Metastatic Response to Treatment

In [None]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
metastaticSitesByDrug = mouseTrialsDF.groupby(["Drug", "Timepoint"])['Metastatic Sites'].mean()
# Convert to DataFrame
metastaticSitesByDrug= pd.DataFrame(metastaticSitesByDrug).reset_index()
# Preview DataFrame
metastaticSitesByDrug.head()

In [None]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
stdErrorMetastaticSites = mouseTrialsDF.groupby(["Drug", "Timepoint"])['Metastatic Sites'].sem()
# Convert to DataFrame
stdErrorMetastaticSites = pd.DataFrame(stdErrorMetastaticSites).reset_index()
# Preview DataFrame
stdErrorMetastaticSites.head()


In [None]:
# Minor Data Munging to Re-Format the Data Frames
metastaticSitesbyTime = metastaticSitesByDrug.pivot(index='Timepoint', columns="Drug", values="Metastatic Sites")
stdErrorMetastaticSites = stdErrorMetastaticSites.pivot(index= 'Timepoint', columns="Drug", values="Metastatic Sites")
# Preview that Reformatting worked
metastaticSitesbyTime

In [None]:
# Generate the Plot (with Error Bars)

fig2, ax2 = plt.subplots()
for drug in drugList:
    ax2.errorbar(metastaticSitesbyTime.index, metastaticSitesbyTime[drug],
             stdErrorMetastaticSites[drug], fmt=plotStyleDict[drug])

ax2.legend()
plt.grid(True)
plt.title('Metastatic Spread during Treatment')
plt.xlabel('Time (Days)')
plt.ylabel('Metastatic Sites')


# Save the Figure
plt.savefig("MetastaticSpreadDuringTreatment.png")
# Show the Figure
plt.show

![Metastatic Spread During Treatment](../Images/spread.png)

## Survival Rates

In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
mouseSurvival = mouseTrialsDF.groupby(["Drug", "Timepoint"])['Mouse ID'].count()

# Convert to DataFrame
mouseSurvival = pd.DataFrame(mouseSurvival).reset_index()
mouseSurvival = mouseSurvival.rename(columns = {"Mouse ID":"Mouse Count"})

# Preview DataFrame
mouseSurvival.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
mouseSurvivalByTime = mouseSurvival.pivot (index = "Timepoint", columns="Drug", values="Mouse Count")
# Preview the Data Frame
mouseSurvivalByTime

In [None]:
# Generate the Plot (for percentages)
mouseSurvivalRate = pd.DataFrame()
for col in mouseSurvivalByTime.columns:
    mouseSurvivalRate[col] = 100 * mouseSurvivalByTime[col]/25

fig3, ax3 = plt.subplots()
for drug in drugList:
    plt.plot(mouseSurvivalRate.index, mouseSurvivalRate[drug], plotStyleDict[drug])
plt.grid(True)
ax3.legend()
plt.title("Survival During Treatment")
plt.xlabel("Time (Days)")
plt.ylabel("Survival Rate (%)")

# Save the Figure
plt.savefig("SurvivalDuringTreatment.png")
# Show the Figure
plt.show()


![Metastatic Spread During Treatment](../Images/survival.png)

## Summary Bar Graph

In [None]:
# Calculate the percent changes for each drug
percentChangeStudyPeriod = (tumorVolVsTime.iloc[9, :] - tumorVolVsTime.iloc[0,:])/tumorVolVsTime.iloc[0,:] * 100

# Display the data to confirm
print(percentChangeStudyPeriod)

In [None]:
percentChange = percentChangeStudyPeriod.to_frame()
percentChange = percentChange.rename(columns = {0 : "Percent Change"})
percentChange["Pass"] = (percentChange < 0)

In [None]:
fig4, ax4 = plt.subplots()
x = 0

for drug in drugList:
    y = percentChange.loc[drug, "Percent Change"]
    if percentChange.loc[drug, "Pass"] == True:
        ax4.bar(drug, y, width = .9, color = "g")
        plt.text(x-.25, -5, '{:.2f}%'.format(y), color = "white")
    else:
        ax4.bar(drug, y, width = .9, color = "r")
        plt.text(x-.2, 2, '{:.2f}%'.format(y), color = "black")
    x = 1 + x
plt.yticks((-20, 0, 20, 40, 60))
ax4.yaxis.grid()
plt.title("Mean Tumor Change over 45 Day Trial")
plt.ylabel("% Change Tumor Volume")
plt.ylim(ymax = 65)

plt.savefig("MeanTumorChange.png")

plt.show()


![Metastatic Spread During Treatment](../Images/change.png)

In [3]:
#Pymaceuticals Observed Trends
# Capomulin was legitimately successful at treating the tumors in this population of mice. It was the only treatment of those analyzed to actually reduce the total tumor volume over the course of treatment. Additionally, survival rate was significantly higher compared to the other treatments. It should be noted that metastatic sites still increased over time, but at a rate slower than the other treatments.
# Some of the other treatments were arguably less effective than no treatment (Placebo). Ketapril, in particular, was less effective at treating tumor volume than the placebo medication. The growth of metastatic sites for placebo treated mice and ketapril treated mice were indistinguishable given how their errors overlapped. This leads one to believe that Ketapril is not an effective method of treatment.
# Infubinol was somewhat of a mixed bag as far as its effectivness. On the other hand, it had a similar trend of treating the tumor volume to Ketapril and the placebo though it did fair slightly better and it was the deadliest drug with the lowest survival rate. On the other hand, it led to fewer metastatic sites than both ketapril and placebo.