In [11]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
file_mouse = "data\mouse_drug_data.csv"
file_trial = "data\clinicaltrial_data.csv"

In [12]:
# Read the Mouse and Drug Data and the Clinical Trial Data
trial_df = pd.read_csv(file_trial)
drug_df = pd.read_csv(file_mouse)
merge_df = pd.merge(trial_df, drug_df, on = 'Mouse ID', how='outer')

# Display the data table for preview
merge_df.head()

FileNotFoundError: [Errno 2] File b'data\\clinicaltrial_data.csv' does not exist: b'data\\clinicaltrial_data.csv'

In [None]:
### Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
mean_tumor_gb = merge_df.groupby(['Drug', 'Timepoint'])['Tumor Volume (mm3)'].mean()                 
mean_tumor_df = pd.DataFrame({'AvgTumorVol(mm3)':mean_tumor_gb})
mean_tumor_df = mean_tumor_gb.reset_index()
# Preview DataFrame (3)
mean_tumor_df.head()

In [None]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
sterr_gb = merge_df.groupby(['Drug', 'Timepoint'])['Tumor Volume (mm3)'].sem()
sterr_df = sterr_gb.reset_index()
sterr_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
sterr_pdf = sterr_df.pivot(index='Timepoint', columns='Drug',values='Tumor Volume (mm3)')
mean_tumor_pdf = mean_tumor_df.pivot(index='Timepoint', columns= 'Drug', values= 'Tumor Volume (mm3)')
# Preview that Reformatting worked
sterr_pdf.head()

In [None]:

# Generate the Plot (with Error Bars)
# Generate the Plot (with Error Bars)
plt.figure(figsize=(15,6))
plt.title("Tumor Response to Treatment")
plt.xlabel("Time (Days)")
plt.ylabel("Tumor Volume (mm3)")
plt.grid()
plt.rcParams["font.size"] = 10
# # create a Series for iteration by the elements of mean_tumor_pdf.columns as below
# drug_index = ['Capomulin', 'Ceftamin', 'Infubinol', 'Ketapril', 'Naftisol', 'Placebo', 'Propriva', 'Ramicane', 'Stelasyn', 'Zoniferol']
drug_index = mean_tumor_pdf.columns

for drug in drug_index:
    plot_drug = plt.errorbar(mean_tumor_pdf.index, mean_tumor_pdf[drug], sterr_pdf[drug], fmt='^', linestyle='--', label=drug)
    plt.legend(loc='best')
    if drug == drug_index[-1]:
        plt.savefig("DrugTumorResponse_vs_Timepoint.png")
plt.show()

In [None]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
metsite_gb = merge_df.groupby(['Drug','Timepoint'])
# Convert to DataFrame
# metsite_gb.reset_index() not performed due to assignment requirements where drug/timepoint on index
metsitemean_df = pd.DataFrame(metsite_gb['Metastatic Sites'].mean())
# Preview DataFrame
metsitemean_df.head()

In [None]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
metsite_sterr = metsite_gb['Metastatic Sites'].sem()
# Convert to DataFrame
metsitesterr_df = pd.DataFrame(metsite_sterr)
# Preview DataFrame
metsitesterr_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
metsitesterr_pdf = metsitesterr_df.reset_index().pivot(index='Timepoint', columns='Drug', values='Metastatic Sites')
# Preview that Reformatting worked
metsitesterr_pdf.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
metsitemean_pdf = metsitemean_df.reset_index().pivot(index='Timepoint', columns='Drug', values='Metastatic Sites')
# Preview that Reformatting worked
metsitemean_pdf.head()

In [None]:
plt.figure(figsize=(15,6))
plt.title("Metastatic Spread During Treatment")
plt.xlabel("Treatment Duration (Days)")
plt.ylabel("Metastatic Sites")
plt.grid()
plt.rcParams["font.size"] = 10
# creates a series by the elements of Drug by listing columns
drugmeta_index = metsitemean_pdf.columns

for drug in drugmeta_index:
    plot_drugmeta = plt.errorbar(metsitemean_pdf.index, metsitemean_pdf[drug], metsitesterr_pdf[drug], fmt='s', linestyle='--', label=drug)
    plt.legend(loc='best')
    if drug == drugmeta_index[-1]:
        plt.savefig("DrugMetaStaticSpread_vs_Timepoint.png")
plt.show()

In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
countmice = merge_df.groupby(['Drug','Timepoint'])['Mouse ID'].count()
# Convert to DataFrame
micecount_df = countmice.reset_index()
# Preview DataFrame
micecount_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
micecount_pdf=micecount_df.pivot(index='Timepoint', columns='Drug', values='Mouse ID')
# Preview the Data Frame
micecount_pdf.head()

In [None]:
drugmicerate_index =micerate_pdf.columns
plt.figure(figsize=(15,6))
plt.title("Survival During Treatment")
plt.xlabel("Treatment Duration (Days)")
plt.ylabel("Survival Rate (%)")
plt.grid()
plt.rcParams["font.size"] = 10

for drug in drugmicerate_index:
    plot_drugsurvivalrate=plt.plot(micerate_pdf.index, micecount_pdf[drug], linestyle='-', marker='^', label= drug)
    plt.legend(loc='best')
    if drug == drugmicerate_index[-1]:
        plt.savefig("DrugMiceSurvivalRate_vs_Timepoint.png")
plt.show()

In [None]:
meantumorpercentage_df =pd.DataFrame({"Mean Tumor Percentage":((mean_tumor_pdf.iloc[-1]-mean_tumor_pdf.iloc[0])/mean_tumor_pdf.iloc[0].round(2))*100})
meantumorpercentage_df.head()