In [4]:
# Dependencies and Setup
%matplotlib inline

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [5]:
# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

In [6]:
# File to Load 
mouse_drug_data_to_load = "C:/Users/dbruen/Documents/Data Analytics/CU Bootcamp Homework/05 Matplotlib HW/Pymaceuticals_DanBruen/Pymaceutical_Data/mouse_drug_data.csv"
clinical_trial_data_to_load = "C:/Users/dbruen/Documents/Data Analytics/CU Bootcamp Homework/05 Matplotlib HW/Pymaceuticals_DanBruen/Pymaceutical_Data/clinical_trial_data.csv"


In [7]:
# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_data = pd.read_csv(mouse_drug_data_to_load)
trial_df = pd.read_csv(clinical_trial_data_to_load)

In [8]:
# Combine the data into a single dataset
drug_data_df = pd.merge(trial_df, mouse_data, on="Mouse ID")

In [9]:
# Display the data table for preview
drug_data_df.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
0,b128,0,45.0,0,Capomulin
1,b128,5,45.651331,0,Capomulin
2,b128,10,43.270852,0,Capomulin
3,b128,15,43.784893,0,Capomulin
4,b128,20,42.731552,0,Capomulin


In [10]:
# **Tumor Response to Treatment**

In [11]:
# Convert to DataFrame - this has already been done, essentially
mean_tumor_vol = drug_data_df.groupby(["Drug", "Timepoint"])["Tumor Volume (mm3)"].mean()
mean_tumor_vol = pd.DataFrame(mean_tumor_vol).reset_index()

In [12]:
# Preview DataFrame
mean_tumor_vol.head()

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,45.0
1,Capomulin,5,44.266086
2,Capomulin,10,43.084291
3,Capomulin,15,42.064317
4,Capomulin,20,40.716325


In [13]:
len(mean_tumor_vol)

100

In [14]:
# Unique counts of Drugs
mean_tumor_vol["Drug"].value_counts()

Placebo      10
Ramicane     10
Infubinol    10
Propriva     10
Capomulin    10
Ketapril     10
Naftisol     10
Ceftamin     10
Stelasyn     10
Zoniferol    10
Name: Drug, dtype: int64

In [15]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
## Import SEM Dependencies
from scipy.stats import sem
from random import random

In [16]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
sem_tumor_vol = drug_data_df.groupby(["Drug", "Timepoint"])["Tumor Volume (mm3)"].sem()
sem_tumor_vol

Drug       Timepoint
Capomulin  0            0.000000
           5            0.448593
           10           0.702684
           15           0.838617
           20           0.909731
                          ...   
Zoniferol  25           0.602513
           30           0.800043
           35           0.881426
           40           0.998515
           45           1.003576
Name: Tumor Volume (mm3), Length: 100, dtype: float64

In [17]:
# Minor Data Munging to Re-Format the Data Frames
# Preview that Reformatting worked
reformed_mean_tumor_vol = mean_tumor_vol.pivot(index="Timepoint", 
                                                   columns="Drug",
                                                   values="Tumor Volume (mm3)")
reformed_mean_tumor_vol.head()

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818
10,43.084291,48.285125,49.403909,49.582269,48.69421,49.423329,49.101541,42.531957,49.463844,48.689881
15,42.064317,50.094055,51.296397,52.399974,50.933018,51.359742,51.067318,41.495061,51.529409,50.779059
20,40.716325,52.157049,53.197691,54.920935,53.644087,54.364417,53.346737,40.238325,54.067395,53.170334


In [18]:
# Generate the Plot (with Error Bars)
# Creating a scatter plot that shows how the tumor volume changes over time for each treatment


In [19]:
# list out all the individual drugs
reformed_mean_tumor_vol.axes[1].tolist()

['Capomulin',
 'Ceftamin',
 'Infubinol',
 'Ketapril',
 'Naftisol',
 'Placebo',
 'Propriva',
 'Ramicane',
 'Stelasyn',
 'Zoniferol']

In [20]:
# Set Vriables
## List out all the drugs into variable
drug_list = ['Capomulin',
 'Ceftamin',
 'Infubinol',
 'Ketapril',
 'Naftisol',
 'Placebo',
 'Propriva',
 'Ramicane',
 'Stelasyn',
 'Zoniferol']

In [21]:
# Save the Figure

In [22]:
# ** Metastatic Response to Treatment **

In [23]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
mean_met = drug_data_df.groupby(["Drug", "Timepoint"])["Metastatic Sites"].mean()
mean_met.head()

Drug       Timepoint
Capomulin  0            0.000000
           5            0.160000
           10           0.320000
           15           0.375000
           20           0.652174
Name: Metastatic Sites, dtype: float64

In [24]:
# Dataframe
mean_met = drug_data_df.groupby(["Drug", "Timepoint"])["Metastatic Sites"].mean()
mean_met_df = pd.DataFrame(mean_met).reset_index()
mean_met_df.head()

Unnamed: 0,Drug,Timepoint,Metastatic Sites
0,Capomulin,0,0.0
1,Capomulin,5,0.16
2,Capomulin,10,0.32
3,Capomulin,15,0.375
4,Capomulin,20,0.652174


In [25]:
 # Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
sem_met_sites = drug_data_df.groupby(["Drug", "Timepoint"])["Metastatic Sites"].sem()
sem_met_sites

Drug       Timepoint
Capomulin  0            0.000000
           5            0.074833
           10           0.125433
           15           0.132048
           20           0.161621
                          ...   
Zoniferol  25           0.236621
           30           0.248168
           35           0.285714
           40           0.299791
           45           0.286400
Name: Metastatic Sites, Length: 100, dtype: float64

In [26]:
# Convert to DataFrame
sem_met_sites_df = pd.DataFrame(sem_met_sites).reset_index()

In [27]:
# Preview DataFrame
sem_met_sites_df.head()

Unnamed: 0,Drug,Timepoint,Metastatic Sites
0,Capomulin,0,0.0
1,Capomulin,5,0.074833
2,Capomulin,10,0.125433
3,Capomulin,15,0.132048
4,Capomulin,20,0.161621


In [28]:
# ** Survival Rates **
# Set Dependencies


In [32]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)
drug_data_df = pd.merge(trial_df, mouse_data, on="Mouse ID")
mice_count = drug_data_df.groupby(["Drug", "Timepoint"])["Mouse ID"].count()
mice_count.head()

Drug       Timepoint
Capomulin  0            25
           5            25
           10           25
           15           24
           20           23
Name: Mouse ID, dtype: int64

In [34]:
# Convert to DataFrame
mice_count_df = pd.DataFrame(mice_count).reset_index()

# Preview DataFrame
mice_count_df.head()

Unnamed: 0,Drug,Timepoint,Mouse ID
0,Capomulin,0,25
1,Capomulin,5,25
2,Capomulin,10,25
3,Capomulin,15,24
4,Capomulin,20,23


In [36]:
# Minor Data Munging to Re-Format the Data Frames
reformed_mince_count = mice_count_df.pivot(index="Timepoint", 
                                                   columns="Drug",
                                                   values="Mouse ID")
# Preview the Data Frame
reformed_mince_count.head()

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,25,25,25,25,25,25,26,25,26,25
5,25,21,25,23,23,24,25,25,25,24
10,25,20,21,22,21,24,23,24,23,22
15,24,19,21,19,21,20,17,24,23,21
20,23,18,20,19,20,19,17,23,21,17


In [None]:
# Generate the Plot (Accounting for percentages)



In [None]:
# Save the Figure

# Show the Figure

In [37]:
# ** Summary Bar Graph **

In [38]:
 # Calculate the percent changes for each drug



In [39]:
# Display the data to confirm


In [None]:
# Written description of three observable trends based on the data.