In [None]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_data = "data/mouse_drug_data.csv"
clinical_data = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data 

mousedata_df = pd.read_csv(mouse_data)
mousedata_df.head()

In [None]:
# Read the Clinical Trial Data
cd_df = pd.read_csv(clinical_data)
cd_df.head()

In [None]:
# Combine the data into a single dataset
merged_df = cd_df.merge(mousedata_df, on="Mouse ID", how = 'inner')

# Display the data table for preview
merged_df.head()

In [None]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
mean_tumor_volume = merged_df.groupby(['Drug', 'Timepoint'])['Tumor Volume (mm3)'].mean()
mean_tumor_volume

# Convert to DataFrame
mean_tumor_volume_df = pd.DataFrame(mean_tumor_volume) 

# Preview DataFrame
mean_tumor_volume_df.head()

In [None]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
std_error_tumor_volume = merged_df.groupby(['Drug', 'Timepoint'])['Tumor Volume (mm3)'].sem()

# Convert to DataFrame
std_error_tumor_volume_df = pd.DataFrame(std_error_tumor_volume)

# # Preview DataFrame
std_error_tumor_volume_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
merged_df_mung = pd.pivot_table(merged_df, index=['Timepoint'], columns=['Drug']) 

# Preview that Reformatting worked
merged_df_mung = merged_df_mung.loc[ : , 'Tumor Volume (mm3)']
merged_df_mung

In [None]:
# Extract means for the drugs - Capomulin, Infubinol, Ketapril and Placebo
capomulin = merged_df_mung[["Capomulin"]]
capomulin_list = capomulin['Capomulin'].tolist()
capomulin_sem = std_error_tumor_volume[["Capomulin"]]
capomulin_list_sem = capomulin_sem ['Capomulin'].tolist()

infubinol = merged_df_mung[["Infubinol"]]
infubinol_list = infubinol['Infubinol'].tolist()
infubinol_sem = std_error_tumor_volume[["Infubinol"]]
infubinol_list_sem = infubinol_sem ['Infubinol'].tolist()

ketapril = merged_df_mung[["Ketapril"]]
ketapril_list = ketapril['Ketapril'].tolist()
ketapril_sem = std_error_tumor_volume[["Ketapril"]]
ketapril_list_sem = ketapril_sem ['Ketapril'].tolist()

placebo = merged_df_mung[["Placebo"]]
placebo_list = placebo['Placebo'].tolist()
placebo_sem = std_error_tumor_volume[["Placebo"]]
placebo_list_sem = placebo_sem ['Placebo'].tolist()

In [None]:
# Generate the Plot (with Error Bars)
x_limit = 50
x_axis = np.arange(0, x_limit, 5)

# Create a random array of data that we will use for our y values
# grades_range = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
plt.scatter(x_axis, capomulin_list, marker='o', color='red')
plt.plot(x_axis, capomulin_list, 'r:o', label='Capomulin')
plt.errorbar(x_axis, capomulin_list, yerr=capomulin_list_sem, color='red')

plt.scatter(x_axis, infubinol_list, marker='^', color='blue')
plt.plot(x_axis, infubinol_list, 'b:^', label='Infubinol')
plt.errorbar(x_axis, infubinol_list, yerr=infubinol_list_sem, color='red')

plt.scatter(x_axis, ketapril_list, marker='s', color='green')
plt.plot(x_axis,ketapril_list, 'g:s', label='Ketapril')
plt.errorbar(x_axis, ketapril_list, yerr=ketapril_list_sem, color='red')

plt.scatter(x_axis, placebo_list, marker='D', color='black')
plt.plot(x_axis, placebo_list, 'k:D', label='Placebo')
plt.errorbar(x_axis, placebo_list, yerr=placebo_list_sem, color='red')

plt.title("Tumor Response To Treatment")
plt.xlabel('Time in Days')
plt.ylabel('Tumor Volume (mm3)')
plt.legend()
plt.grid()

# Save the Figure
plt.savefig('Images/tumor-response-to-treatment.png')

# Show the Figure
plt.show()

In [None]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
mean_metastatic_sites = merged_df.groupby(['Drug', 'Timepoint'])['Metastatic Sites'].mean()

# Convert to DataFrame
mean_metastatic_sites_df = pd.DataFrame(mean_metastatic_sites)

# Preview DataFrame
mean_metastatic_sites_df.head()

In [None]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
metastatic_std_error = merged_df.groupby(['Drug', 'Timepoint'])['Metastatic Sites'].sem()

# Convert to DataFrame
metastatic_std_error_df = pd.DataFrame(metastatic_std_error)

# Preview DataFrame
metastatic_std_error_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
metastatic_mung = pd.pivot_table(merged_df, index=['Timepoint'], columns=['Drug']) 

# Preview that Reformatting worked
metastatic_mung = metastatic_mung.loc[ : , 'Metastatic Sites']
metastatic_mung

In [None]:
# Extract means for the drugs - Capomulin, Infubinol, Ketapril and Placebo
capomulin_metastatic_mung = metastatic_mung[["Capomulin"]]
capomulin_metastatic_mung_list = capomulin_metastatic_mung['Capomulin'].tolist()
capomulin_metastatic_mung_sem = metastatic_std_error[['Capomulin']]
capomulin_metastatic_mung_sem_list = capomulin_metastatic_mung_sem['Capomulin'].tolist()

infubinol_metastatic_mung = metastatic_mung[["Infubinol"]]
infubinol_metastatic_mung_list = infubinol_metastatic_mung['Infubinol'].tolist()
infubinol_metastatic_mung_sem = metastatic_std_error[['Infubinol']]
infubinol_metastatic_mung_sem_list = infubinol_metastatic_mung_sem['Infubinol'].tolist()

ketapril_metastatic_mung = metastatic_mung[["Ketapril"]]
ketapril_metastatic_mung_list = ketapril_metastatic_mung['Ketapril'].tolist()
ketapril_metastatic_mung_sem = metastatic_std_error[['Ketapril']]
ketapril_metastatic_mung_sem_list = ketapril_metastatic_mung_sem['Ketapril'].tolist()

placebo_metastatic_mung = metastatic_mung[["Placebo"]]
placebo_metastatic_mung_list = placebo_metastatic_mung['Placebo'].tolist()
placebo_metastatic_mung_sem = metastatic_std_error[['Placebo']]
placebo_metastatic_mung_sem_list = placebo_metastatic_mung_sem['Placebo'].tolist()

In [None]:
# Generate the Plot (with Error Bars)

x_limit = 50
x_axis = np.arange(0, x_limit, 5)

plt.scatter(x_axis, capomulin_metastatic_mung_list, marker='o', color='red')
plt.plot(x_axis, capomulin_metastatic_mung_list, 'r:o', label='Capomulin')
plt.errorbar(x_axis, capomulin_metastatic_mung_list, yerr=capomulin_metastatic_mung_sem_list, color='red')

plt.scatter(x_axis, infubinol_metastatic_mung_list, marker='^', color='blue')
plt.plot(x_axis, infubinol_metastatic_mung_list, 'b:^', label='Infubinol')
plt.errorbar(x_axis, infubinol_metastatic_mung_list, yerr=infubinol_metastatic_mung_sem_list, color='blue')

plt.scatter(x_axis, ketapril_metastatic_mung_list, marker='s', color='green')
plt.plot(x_axis, ketapril_metastatic_mung_list, 'g:s', label='Ketapril')
plt.errorbar(x_axis, ketapril_metastatic_mung_list, yerr=ketapril_metastatic_mung_sem_list, color='green')

plt.scatter(x_axis, placebo_metastatic_mung_list, marker='D', color='black')
plt.plot(x_axis, placebo_metastatic_mung_list, 'k:D', label='Placebo')
plt.errorbar(x_axis, placebo_metastatic_mung_list, yerr=placebo_metastatic_mung_sem_list, color='black')

plt.title("Metastatic Spread During Treatment")
plt.xlabel('Time in Days')
plt.ylabel('Tumor Volume (mm3)')

# Save the Figure
plt.savefig('Images/metastatic-spread-during-treatment.png')

# Show the Figure
plt.grid()
plt.legend()
plt.show()


In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
mice_count = merged_df.groupby(['Drug', 'Timepoint'])['Mouse ID'].count()
# Convert to DataFrame
mice_count_df = pd.DataFrame(mice_count)

# Preview DataFrame
mice_count_df.head()


In [None]:
# Minor Data Munging to Re-Format the Data Frames
micecountmung = pd.pivot_table(mice_count_df, index=['Timepoint'], columns=['Drug']) 

# Preview that DataFrame
mice_count_mung = micecountmung.loc[:, 'Mouse ID']
mice_count_mung

In [None]:
# Extract means for the drugs - Capomulin, Infubinol, Ketapril and Placebo
capomulin_micecount_mung = mice_count_mung[["Capomulin"]]
capomulin_micecount_mung_list = capomulin_micecount_mung['Capomulin'].tolist()

infubinol_micecount_mung = mice_count_mung[["Infubinol"]]
infubinol_micecount_mung_list = infubinol_micecount_mung['Infubinol'].tolist()

ketapril_micecount_mung = mice_count_mung[["Ketapril"]]
ketapril_micecount_mung_list = ketapril_micecount_mung['Ketapril'].tolist()

placebo_micecount_mung = mice_count_mung[["Placebo"]]
placebo_micecount_mung_list = placebo_micecount_mung['Placebo'].tolist()



In [None]:
# Generate the Plot (Accounting for percentages)

x_limit = 50
x_axis = np.arange(0, x_limit, 5)

plt.scatter(x_axis, capomulin_micecount_mung_list, marker='o', color='red')
plt.plot(x_axis, capomulin_micecount_mung_list, 'r:o', label='Capomulin')

plt.scatter(x_axis, infubinol_micecount_mung_list, marker='^', color='blue')
plt.plot(x_axis, infubinol_micecount_mung_list, 'b:^', label='Infubinol')

plt.scatter(x_axis, ketapril_micecount_mung_list, marker='s', color='green')
plt.plot(x_axis, ketapril_micecount_mung_list, 'g:s', label='Ketapril')

plt.scatter(x_axis, placebo_micecount_mung_list, marker='D', color='black')
plt.plot(x_axis, placebo_micecount_mung_list, 'k:D', label='Placebo')

plt.title("Survival During Treatment")
plt.xlabel('Time in Days')
plt.ylabel('Survival Rate (%)')
plt.grid()
plt.legend()

# Save the Figure


# Show the Figure
plt.show()



In [None]:
# Calculate the percent changes for each drug
mean_tumor_vol_df = mean_tumor_volume_df.loc[:, 'Tumor Volume (mm3)']

capomulin_perc_change = ((mean_tumor_vol_df.loc['Capomulin'][45] - mean_tumor_vol_df.loc['Capomulin'][0]) / mean_tumor_vol_df.loc['Capomulin'][0]) * 100
capomulin_perc_change = round(capomulin_perc_change, 2)

ceftamin_perc_change = ((mean_tumor_vol_df.loc['Ceftamin'][45] - mean_tumor_vol_df.loc['Ceftamin'][0]) / mean_tumor_vol_df.loc['Ceftamin'][0]) * 100
ceftamin_perc_change = round(ceftamin_perc_change, 2)

infubinol_perc_change = ((mean_tumor_vol_df.loc['Infubinol'][45] - mean_tumor_vol_df.loc['Infubinol'][0]) / mean_tumor_vol_df.loc['Infubinol'][0]) * 100
infubinol_perc_change = round(infubinol_perc_change, 2)

ketapril_perc_change = ((mean_tumor_vol_df.loc['Ketapril'][45] - mean_tumor_vol_df.loc['Ketapril'][0]) / mean_tumor_vol_df.loc['Ketapril'][0]) * 100
ketapril_perc_change = round(ketapril_perc_change, 2)

naftisol_perc_change = ((mean_tumor_vol_df.loc['Naftisol'][45] - mean_tumor_vol_df.loc['Naftisol'][0]) / mean_tumor_vol_df.loc['Naftisol'][0]) * 100
naftisol_perc_change = round(naftisol_perc_change, 2)

placebo_perc_change = ((mean_tumor_vol_df.loc['Placebo'][45] - mean_tumor_vol_df.loc['Placebo'][0]) / mean_tumor_vol_df.loc['Placebo'][0]) * 100
placebo_perc_change = round(placebo_perc_change, 2)

propriva_perc_change = ((mean_tumor_vol_df.loc['Propriva'][45] - mean_tumor_vol_df.loc['Propriva'][0]) / mean_tumor_vol_df.loc['Propriva'][0]) * 100
propriva_perc_change = round(propriva_perc_change, 2)

ramicane_perc_change = ((mean_tumor_vol_df.loc['Ramicane'][45] - mean_tumor_vol_df.loc['Ramicane'][0]) / mean_tumor_vol_df.loc['Ramicane'][0]) * 100
ramicane_perc_change = round(ramicane_perc_change, 2)

stelasyn_perc_change = ((mean_tumor_vol_df.loc['Stelasyn'][45] - mean_tumor_vol_df.loc['Stelasyn'][0]) / mean_tumor_vol_df.loc['Stelasyn'][0]) * 100
stelasyn_perc_change = round(stelasyn_perc_change, 2)

zoniferol_perc_change = ((mean_tumor_vol_df.loc['Zoniferol'][45] - mean_tumor_vol_df.loc['Zoniferol'][0]) / mean_tumor_vol_df.loc['Zoniferol'][0]) * 100
zoniferol_perc_change = round(zoniferol_perc_change, 2)


# Display the data to confirm
perc_change_list = [capomulin_perc_change, ceftamin_perc_change, 
                    infubinol_perc_change, ketapril_perc_change, 
                    naftisol_perc_change, placebo_perc_change,
                    propriva_perc_change, ramicane_perc_change, 
                    stelasyn_perc_change, zoniferol_perc_change]

drugs = ['Capomulin', 'Ceftamin', 'Infubinol',
         'Ketapril', 'Naftisol', 'Placebo', 
         'Propriva', 'Ramicane', 'Stelasyn', 'Zoniferol']

perc_change_df = pd.DataFrame(perc_change_list, columns = ['Percent Change',], index=drugs)
perc_change_df

# Store all Relevant Percent Changes into a Tuple
perc_change_tuple = (capomulin_perc_change, infubinol_perc_change, ketapril_perc_change, placebo_perc_change)
perc_change_tuple

bars = ['Capomulin', 'Infubinol', 'Ketapril', 'Placebo']
height = perc_change_tuple
y = np.arange(len(bars))
width = 1/1.05
plt.bar(y, height, width, color=['green', 'red', 'red', 'red'])
plt.grid()
plt.title("Tumor Change Over 45 Day Treatment")
plt.xlabel('Drugs')
plt.xticks(y, bars)
plt.ylabel('% Tumor of Volume Change')

for a,b in zip(y, height):
    plt.text(a - 0.10, b, str(b))

# Save the Figure
plt.savefig("Images/tumor-change-over-45-day-treatment.png")

# Show the Figure
plt.show()

In [None]:
# Store all Relevant Percent Changes into a Tuple
perc_change_tuple = (capomulin_perc_change, infubinol_perc_change, ketapril_perc_change, placebo_perc_change)
perc_change_tuple

bars = ['Capomulin', 'Infubinol', 'Ketapril', 'Placebo']
height = perc_change_tuple
y = np.arange(len(bars))
width = 1/1.05
plt.bar(y, height, width, color=['green', 'red', 'red', 'red'])
plt.grid()
plt.title("Tumor Change Over 45 Day Treatment")
plt.xlabel('Drugs')
plt.xticks(y, bars)
plt.ylabel('% Tumor of Volume Change')

for a,b in zip(y, height):
    plt.text(a - 0.10, b, str(b))

# Save the Figure


# Show the Figure
plt.show()


# 3 Observable Trends
1. Placebo drug performs just as efficently as Ketapril and Infubinol. 2.Capomulin seems to be the best drug.  3. Capomulin is 19% in tumor reduction.



