In [None]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import statistics

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data
clinical_trial_df = pd.read_csv(clinical_trial_data_to_load, low_memory=False)
mouse_drug_df = pd.read_csv(mouse_drug_data_to_load, low_memory=False)

# Combine the data into a single dataset
combined_drug_df = pd.merge(clinical_trial_df, mouse_drug_df, how='left', on="Mouse ID")


# Display the data table for preview
combined_drug_df.head()


## Tumor Response to Treatment

In [None]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
grouped_drug_df = combined_drug_df.groupby(["Drug","Timepoint"])

# Convert to DataFrame
mean_tumor_vol_df = grouped_drug_df["Tumor Volume (mm3)"].mean()
tumor_scatter_df = pd.DataFrame(mean_tumor_vol_df).reset_index()

# Preview DataFrame
tumor_scatter_df.head()                                                      

In [None]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
tumor_scatter_error_df = grouped_drug_df["Tumor Volume (mm3)"].sem()

# Convert to DataFrame
tumor_scatter_error_reset_df = pd.DataFrame(tumor_scatter_error_df).reset_index()

# Preview DataFrame
tumor_scatter_error_reset_df.head()


In [None]:
# Minor Data Munging to Re-Format the Data Frames
reformatted_tumor_scatter_df = tumor_scatter_df.pivot(index = "Timepoint", columns ="Drug", values = "Tumor Volume (mm3)")

# Preview that Reformatting worked
reformatted_tumor_scatter_df

In [None]:
# Generate the Plot (with Error Bars)

time = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45]

cap_tumor_data = tumor_scatter_error_reset_df.loc[tumor_scatter_error_reset_df["Drug"] == "Capomulin", "Tumor Volume (mm3)"]
plt.errorbar(time, reformatted_tumor_scatter_df["Capomulin"], yerr= cap_tumor_data, label= "Capomulin", marker= "o", color="r", linestyle="dashed")

inf_tumor_data = tumor_scatter_error_reset_df.loc[tumor_scatter_error_reset_df["Drug"] == "Infubinol", "Tumor Volume (mm3)"]
plt.errorbar(time, reformatted_tumor_scatter_df["Infubinol"], yerr= inf_tumor_data, label= "Infubinol", marker= "^", color="b", linestyle="dashed")

ket_tumor_data = tumor_scatter_error_reset_df.loc[tumor_scatter_error_reset_df["Drug"] == "Ketapril", "Tumor Volume (mm3)"]
plt.errorbar(time, reformatted_tumor_scatter_df["Ketapril"], yerr= ket_tumor_data, label= "Ketapril", marker= "s", color="g", linestyle="dashed")

placebo_tumor_data = tumor_scatter_error_reset_df.loc[tumor_scatter_error_reset_df["Drug"] == "Placebo", "Tumor Volume (mm3)"]
plt.errorbar(time, reformatted_tumor_scatter_df["Placebo"], yerr= placebo_tumor_data , label= "Placebo", marker= "d", color="k", linestyle="dashed")

plt.legend()
plt.title("Tumor Response to Treatment ")
plt.xlabel("Time (Days)")
plt.ylabel("Tumor Volume (mm3)")
plt.grid()

# Save the Figure
plt.savefig("Images/tumor_response_to_treatment.png")

# Show the Figure
plt.show()

## Metastatic Response to Treatment

In [None]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
grouped_metastatic_df = combined_drug_df.groupby(["Drug","Timepoint"])

# Convert to DataFrame
mean_metastatic_df = grouped_metastatic_df["Metastatic Sites"].mean()
metastatic_scatter_df = pd.DataFrame(mean_metastatic_df).reset_index()

# Preview DataFrame
metastatic_scatter_df.head()

In [None]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
metastatic_scatter_error_df = grouped_drug_df["Metastatic Sites"].sem()

# Convert to DataFrame
metastatic_scatter_error_reset_df = pd.DataFrame(metastatic_scatter_error_df).reset_index()

# Preview DataFrame
metastatic_scatter_error_reset_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
reformatted_metastatic_scatter_df = metastatic_scatter_df.pivot(index = "Timepoint", columns ="Drug", values = "Metastatic Sites")

# Preview that Reformatting worked
reformatted_metastatic_scatter_df

In [None]:
# Generate the Plot (with Error Bars)

time = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45]

cap_tumor_data_two = metastatic_scatter_error_reset_df.loc[metastatic_scatter_error_reset_df["Drug"] == "Capomulin", "Metastatic Sites"]
plt.errorbar(time, reformatted_metastatic_scatter_df["Capomulin"], yerr= cap_tumor_data_two, label= "Capomulin", marker= "o", color="r", linestyle="dashed")

inf_tumor_data_two = metastatic_scatter_error_reset_df.loc[metastatic_scatter_error_reset_df["Drug"] == "Infubinol", "Metastatic Sites"]
plt.errorbar(time, reformatted_metastatic_scatter_df["Infubinol"], yerr= inf_tumor_data_two, label= "Infubinol", marker= "^", color="b", linestyle="dashed")

ket_tumor_data_two = metastatic_scatter_error_reset_df.loc[metastatic_scatter_error_reset_df["Drug"] == "Ketapril", "Metastatic Sites"]
plt.errorbar(time, reformatted_metastatic_scatter_df["Ketapril"], yerr= ket_tumor_data_two, label= "Ketapril", marker= "s", color="g", linestyle="dashed")

placebo_tumor_data_two = metastatic_scatter_error_reset_df.loc[metastatic_scatter_error_reset_df["Drug"] == "Placebo", "Metastatic Sites"]
plt.errorbar(time, reformatted_metastatic_scatter_df["Placebo"], yerr= placebo_tumor_data_two , label= "Placebo", marker= "d", color="k", linestyle="dashed")

plt.legend()
plt.title("Metastatic Response To Treatment")
plt.xlabel("Time (Days)")
plt.ylabel("Metastatic Sites")
plt.grid()

# Save the Figure
plt.savefig("Images/metastatic_response_to_treatment.png")

# Show the Figure
plt.show()

## Survival Rates

In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)

# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
grouped_mouse_df = combined_drug_df.groupby(["Drug","Timepoint"])

# Convert to DataFrame
count_mouse_df = grouped_mouse_df["Mouse ID"].count()
mouse_scatter_df = pd.DataFrame(count_mouse_df).reset_index()

# Preview DataFrame
mouse_scatter_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
reformatted_mouse_scatter_df = mouse_scatter_df.pivot(index = "Timepoint", columns ="Drug", values = "Mouse ID")

# Preview that Reformatting worked
reformatted_mouse_scatter_df


In [None]:
# Generate the Plot (Accounting for percentages)

Time = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45]

plt.plot(Time, (reformatted_mouse_scatter_df["Capomulin"]/25)*100 , label= "Capomulin", marker= "o", color="r", linestyle="dashed")
plt.plot(Time, (reformatted_mouse_scatter_df["Infubinol"]/25)*100 , label= "Infubinol", marker= "^", color="b", linestyle="dashed")
plt.plot(Time, (reformatted_mouse_scatter_df["Ketapril"]/25)*100 , label= "Ketapril", marker= "s", color="g", linestyle="dashed")
plt.plot(Time, (reformatted_mouse_scatter_df["Placebo"]/25)*100 , label= "Placebo", marker= "d", color="k", linestyle="dashed")

plt.legend()
plt.title("Survival Rates During Treatment")
plt.xlabel("Time (Days)")
plt.ylabel("Survival Rate (%) ")
plt.grid()

# Save the Figure
plt.savefig("Images/survival_rates_during_treatment.png")

# Show the Figure
plt.show()

## Summary Bar Graph

In [None]:
# Calculate the percent changes for each drug
tumor_vol = 45
percent_change = ((reformatted_tumor_scatter_df.loc[45, :] - tumor_vol)/tumor_vol)*100

# Display the data to confirm
percent_change

In [None]:
# Orient widths. Add labels, tick marks, etc. 
fig, summary = plt.subplots()
y_axis = [percent_change["Infubinol"], percent_change["Ketapril"], percent_change["Placebo"]]
x_axis_negative = [0]
x_axis_positive = [1, 2, 3]

x_axis_negative_bar = summary.bar(x_axis_negative, percent_change["Capomulin"], color="g", align = "center", width = 0.5)
x_axis_positive_bar = summary.bar(x_axis_positive, y_axis , color='r', align = "center", width = 0.5)
x_axis_labels = ["Capomulin", "Infubinol", "Ketapril", "Placebo"]

plt.setp(summary, xticks=[0, 1, 2, 3], xticklabels=["Capomulin", "Infubinol", "Ketapril", "Placebo"], yticks=[-20, 0, 20, 40, 60])
summary.set_ylabel("Tumor Volume Change (%)")
summary.set_title("Tumor Change Over 45 Days")
summary.grid()

# Use functions to label the percentages of changes
def autolabel(labels):
    for label in labels:
        height = label.get_height()
        summary.text(label.get_x() + label.get_width()/2, .5*height, "%d" %int(height)+ "%",
                ha='center', va='top', color="black")

# Call functions to implement the function calls
autolabel(x_axis_negative_bar)
autolabel(x_axis_positive_bar)
fig.tight_layout()

# Save the Figure
plt.savefig("Images/summary_bar_chart.png")

# Show the Figure
fig.show()