In [None]:
#To do this you are tasked with:

#Creating a scatter plot that shows how the tumor volume changes over time for each treatment.
#Creating a scatter plot that shows how the number of metastatic (cancer spreading) sites changes over time for each treatment.
#Creating a scatter plot that shows the number of mice still alive through the course of treatment (Survival Rate)
#Creating a bar graph that compares the total % tumor volume change for each drug across the full 45 days.

#As final considerations:

#You must use the Pandas Library and the Jupyter Notebook.
#You must use the Matplotlib library.
#You must include a written description of three observable trends based on the data.
#You must use proper labeling of your plots, including aspects like: Plot Titles, Axes Labels, Legend Labels, X and Y Axis Limits, etc.
#Your scatter plots must include error bars. This will allow the company to account for variability between mice. You may want to look into pandas.DataFrame.sem for ideas on how to calculate this.
#Remember when making your plots to consider aesthetics!

#Your legends should not be overlaid on top of any data.
#Your bar graph should indicate tumor growth as red and tumor reduction as green.
#It should also include a label with the percentage change for each bar. You may want to consult this tutorial for relevant code snippets.

In [3]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_drug_data = "Data/mouse_drug_data.csv"
clinical_trial_data = "Data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data

# Import the books.csv file as a DataFrame
mouse_drug_df = pd.read_csv(mouse_drug_data)
clinical_trial_df = pd.read_csv(clinical_trial_data)
mouse_drug_df.head(10)
clinical_trial_df.head(10)

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,b128,0,45.0,0
1,f932,0,45.0,0
2,g107,0,45.0,0
3,a457,0,45.0,0
4,c819,0,45.0,0
5,h246,0,45.0,0
6,p189,0,45.0,0
7,n923,0,45.0,0
8,q119,0,45.0,0
9,f993,0,45.0,0


In [4]:
# Combine the data into a single dataset and display the data table for preview
drug_tumor_table_df = pd.merge(mouse_drug_df,clinical_trial_df, on="Mouse ID")
drug_tumor_table_df.head()

Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,f234,Stelasyn,0,45.0,0
1,f234,Stelasyn,5,47.313491,0
2,f234,Stelasyn,10,47.904324,0
3,f234,Stelasyn,15,48.735197,1
4,f234,Stelasyn,20,51.112713,2


In [6]:
#Tumor Response to Treatment
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint as a dataframe as display
drug_tumor_grouped_df = drug_tumor_table_df.groupby(['Drug','Timepoint'])

drug_tumor_grouped_df.head()

Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,f234,Stelasyn,0,45.000000,0
1,f234,Stelasyn,5,47.313491,0
2,f234,Stelasyn,10,47.904324,0
3,f234,Stelasyn,15,48.735197,1
4,f234,Stelasyn,20,51.112713,2
5,f234,Stelasyn,25,52.178602,2
6,f234,Stelasyn,30,56.873817,2
7,f234,Stelasyn,35,57.748758,2
8,x402,Stelasyn,0,45.000000,0
9,x402,Stelasyn,5,46.327570,1


In [10]:
drug_tumor_sort_df = drug_tumor_table_df.sort_values(
    ["Drug", "Timepoint"])
drug_tumor_sort_df.head(25)

Unnamed: 0,Mouse ID,Drug,Timepoint,Tumor Volume (mm3),Metastatic Sites
581,b128,Capomulin,0,45.0,0
591,r944,Capomulin,0,45.0,0
601,s185,Capomulin,0,45.0,0
611,w914,Capomulin,0,45.0,0
621,l897,Capomulin,0,45.0,0
631,b742,Capomulin,0,45.0,0
641,f966,Capomulin,0,45.0,0
646,u364,Capomulin,0,45.0,0
656,j119,Capomulin,0,45.0,0
666,l509,Capomulin,0,45.0,0
