In [101]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import sem
from random import random

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load 
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data
file1 = pd.read_csv(mouse_drug_data_to_load)
file2 = pd.read_csv(clinical_trial_data_to_load)

# Combine the data into a single dataset
raw_data_df = pd.merge(file1, file2, on="Mouse ID", how="outer")
raw_data_df.head(2)
# list(raw_data_df)

Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,f234,Stelasyn,0,45.0,0
1,f234,Stelasyn,5,47.313491,0


## Tumor Response to Treatment

In [102]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
mean_tumor_df = raw_data_df.groupby(["Drug", "Timepoint"])["Tumor Volume (mm3)"].mean()   
mean_tumor_df = pd.DataFrame({"Avg Tumor Vol (mm3)": mean_tumor_df})

# reset an index to get a clean look
mean_tumor_df = mean_tumor_df.reset_index()
mean_tumor_df.head(2)

Unnamed: 0,Drug,Timepoint,Avg Tumor Vol (mm3)
0,Capomulin,0,45.0
1,Capomulin,5,44.266086


In [103]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
se_tumor_df = raw_data_df.groupby(["Drug", "Timepoint"])["Tumor Volume (mm3)"].sem()
# se_tumor_df = pd.DataFrame({"SE Tumor Volume (mm3)": se_tumor_df})
se_tumor_df = pd.DataFrame(se_tumor_df)
# reset an index to get a clean look
# se_tumor_df = se_tumor_df.reset_index()
se_tumor_df.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Tumor Volume (mm3)
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,0.0
Capomulin,5,0.448593


In [121]:
# Minor Data Munging to Re-Format the Data Frames
# https://www.analyticsvidhya.com/blog/2014/09/data-munging-python-using-pandas-baby-steps-python/
# Preview that Reformatting worked

# drugs_df = raw_data_df.index(["Timepoint", "Tumor Volume (mm3)" "Drug"], drop = True).unstack("Drug")
# drugs_df.head(2)

mean1_tumor_df = mean_tumor_df.pivot(index = "Timepoint", columns = "Drug")["Avg Tumor Vol (mm3)"]
mean1_tumor_df.head(2)


# raw_data_df = raw_data_df.fillna(0)
# raw_data_df.dtypes

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818


In [109]:
# Generate the Plot (with Error Bars)

# x_axis = np.arange(0, len(timepoint), 1) # x_axis is an index of a priv DF always 

x_axis = mean_tumor_df.index

plt.errorbar(x_axis, mean_tumor_df["Capomulin"], yerr = se_tumor_df["Capomulin"], 
             marker="o", color="red")
             
plt.errorbar(x_axis, mean_tumor_df["Infubinol"], yerr = se_tumor_df["Infubinol"], 
             marker="o", color="red") 

 #label="Tumor Response To Treatment")

# plt.legend(loc="best", fontsize="small", fancybox=True)

# ax.set_xlim(-1, len(?) + 1)

# ax.set_xlabel("Time (Days)")
# ax.set_ylabel("Tumor Volume (mm3)")

# Save the Figure



In [None]:
# Show the Figure
plt.show()

## Metastatic Response to Treatment

In [126]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
mean_metastatic_df = raw_data_df.groupby(["Drug", "Timepoint"])["Metastatic Sites"].mean()   
mean_metastatic_df = pd.DataFrame({"Avg Metastatic Sites": mean_metastatic_df})
mean_metastatic_df = mean_metastatic_df.reset_index()
mean_metastatic_df.head(2)

Unnamed: 0,Drug,Timepoint,Avg Metastatic Sites
0,Capomulin,0,0.0
1,Capomulin,5,0.16


In [97]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
se_metastatic_df = raw_data_df.groupby(["Drug", "Timepoint"])["Metastatic Sites"].sem()
se_metastatic_df = pd.DataFrame({"SE Metastatic Sites": se_metastatic_df})
se_metastatic_df.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,SE Metastatic Sites
Drug,Timepoint,Unnamed: 2_level_1
Capomulin,0,0.0
Capomulin,5,0.074833


In [127]:
# Minor Data Munging to Re-Format the Data Frames
mean_metastatic_df = mean_metastatic_df.pivot(index = "Timepoint", columns = "Drug")["Avg Metastatic Sites"]
mean_metastatic_df.head(2)

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.16,0.380952,0.28,0.304348,0.26087,0.375,0.32,0.12,0.24,0.166667


In [131]:
# Generate the Plot (with Error Bars)


## Survival Rates

In [141]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
count_mice_df = raw_data_df.groupby(["Drug", "Timepoint"])["Mouse ID"].count()
# count_mice_df = count_mice_df({"Count Mice": count_mice_df})

# Convert to DataFrame
count_mice_df = pd.DataFrame({"Count Mice": count_mice_df})
count_mice_df = count_mice_df.reset_index()
count_mice_df.head(2)

Unnamed: 0,Drug,Timepoint,Count Mice
0,Capomulin,0,25
1,Capomulin,5,25


In [142]:
# Minor Data Munging to Re-Format the Data Frames
count_mice_df = count_mice_df.pivot(index = "Timepoint", columns = "Drug")["Count Mice"]
count_mice_df.head(2)

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,25,25,25,25,25,25,26,25,26,25
5,25,21,25,23,23,24,25,25,25,24


In [None]:
# Generate the Plot (Accounting for percentages)

# Save the Figure

# Show the Figure
plt.show()

## Summary Bar Graph

In [None]:
# Calculate the percent changes for each drug

# Display the data to confirm


In [None]:
# Store all Relevant Percent Changes into a Tuple


# Splice the data between passing and failing drugs


# Orient widths. Add labels, tick marks, etc. 


# Use functions to label the percentages of changes


# Call functions to implement the function calls


# Save the Figure


# Show the Figure
fig.show()