In [13]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from statistics import stdev
from scipy.stats import sem

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data
Mousedrug_df = pd.read_csv(mouse_drug_data_to_load)
Clinicaltrial_df = pd.read_csv(clinical_trial_data_to_load)
# Combine the data into a single dataset
mouse_trial_df = pd.merge(Clinicaltrial_df, Mousedrug_df , on="Mouse ID")

# Display the data table for preview

mouse_trial_df.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
0,b128,0,45.0,0,Capomulin
1,b128,5,45.651331,0,Capomulin
2,b128,10,43.270852,0,Capomulin
3,b128,15,43.784893,0,Capomulin
4,b128,20,42.731552,0,Capomulin


## Tumor Response to Treatment

In [7]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
Drug_type = mouse_trial_df[["Timepoint", "Drug", "Tumor Volume (mm3)"]] 
Drug_group = Drug_type.groupby(["Drug", "Timepoint"])
Drug_Mean = Drug_group.mean()
Drug_Mean_df = pd.DataFrame(Drug_Mean).reset_index()
Drug_Mean_df


Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,45.000000
1,Capomulin,5,44.266086
2,Capomulin,10,43.084291
3,Capomulin,15,42.064317
4,Capomulin,20,40.716325
5,Capomulin,25,39.939528
6,Capomulin,30,38.769339
7,Capomulin,35,37.816839
8,Capomulin,40,36.958001
9,Capomulin,45,36.236114


In [14]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
Tumor_se= Drug_group['Tumor Volume (mm3)'].sem()
#Tumor_se.head()
Tumor_Mean_df = pd.DataFrame(Tumor_se).reset_index()
Tumor_Mean_df
# Convert to DataFrame
#Tumor_se.head()
# Preview DataFrame

#mouse_trial_df["Tumor Volume (mm3)"].groupby(mouse_trial_df("Drug").describe()

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,0.000000
1,Capomulin,5,0.448593
2,Capomulin,10,0.702684
3,Capomulin,15,0.838617
4,Capomulin,20,0.909731
5,Capomulin,25,0.881642
6,Capomulin,30,0.934460
7,Capomulin,35,1.052241
8,Capomulin,40,1.223608
9,Capomulin,45,1.223977


In [10]:
# Minor Data Munging to Re-Format the Data Frames

Timepoint_group = Drug_type.groupby(["Timepoint", "Drug"]) 
#Timepoint.mean()
Timepoint_df = Drug_Mean_df.pivot(index='Timepoint', columns='Drug', values = 'Tumor Volume (mm3)') 
Timepoint_df.head()
# Preview that Reformatting worked
#Test = np.transpose(Drug_group)
#estp = pd.DataFrame(Test)
#Testp


Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
5,44.266086,46.503051,47.062001,47.389175,46.796098,47.125589,47.248967,43.944859,47.527452,46.851818
10,43.084291,48.285125,49.403909,49.582269,48.69421,49.423329,49.101541,42.531957,49.463844,48.689881
15,42.064317,50.094055,51.296397,52.399974,50.933018,51.359742,51.067318,41.495061,51.529409,50.779059
20,40.716325,52.157049,53.197691,54.920935,53.644087,54.364417,53.346737,40.238325,54.067395,53.170334


In [None]:
# Generate the Plot (with Error Bars)
tick_locations = ["Tumour Volume (mm3)","Timepoint"]
# Save the Figure
Drugs = mouse_trial_df["Drug"]
x_axis = np.arange(0, len(Drugs))
for x in x_axis:
    tick_locations.append(x)
    
# Plot the world average as a line chart - I wanted to use this as an example to complete the xercise.
world_avg, = plt.plot(years, average_unemployment, color="blue", label="World Average" )

# Plot the unemployment values for a single country
country_one, = plt.plot(years, combined_unemployed_data.loc['USA',["2010","2011","2012","2013","2014"]], 
                        color="green",label=combined_unemployed_data.loc['USA',"Country Name"])

# Create a legend for our chart
plt.legend(handles=[world_avg, country_one], loc="best")

# Show the chart
plt.show()    

In [None]:
# Show the Figure
plt.show()

## Metastatic Response to Treatment

In [18]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 

Meta_type = mouse_trial_df[["Timepoint", "Drug", "Metastatic Sites"]] 
Meta_group = Meta_type.groupby(["Drug", "Timepoint"]) 
Meta_Mean = Meta_group.mean() 
#Meta_Mean.head()
Meta_Mean_df = pd.DataFrame(Meta_Mean).reset_index()
Meta_Mean_df

Unnamed: 0,Drug,Timepoint,Metastatic Sites
0,Capomulin,0,0.000000
1,Capomulin,5,0.160000
2,Capomulin,10,0.320000
3,Capomulin,15,0.375000
4,Capomulin,20,0.652174
5,Capomulin,25,0.818182
6,Capomulin,30,1.090909
7,Capomulin,35,1.181818
8,Capomulin,40,1.380952
9,Capomulin,45,1.476190


In [22]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
Meta_se= Meta_group['Metastatic Sites'].sem()
Meta_Mean_df = pd.DataFrame(Meta_se).reset_index()
Meta_Mean_df
#Met_Sites_std= Meta_group.std()
#Met_Sites_std.head()

Unnamed: 0,Drug,Timepoint,Metastatic Sites
0,Capomulin,0,0.000000
1,Capomulin,5,0.074833
2,Capomulin,10,0.125433
3,Capomulin,15,0.132048
4,Capomulin,20,0.161621
5,Capomulin,25,0.181818
6,Capomulin,30,0.172944
7,Capomulin,35,0.169496
8,Capomulin,40,0.175610
9,Capomulin,45,0.202591


In [25]:
# Minor Data Munging to Re-Format the Data Frames

Meta_group_m = Meta_type.groupby(["Timepoint", "Drug"]) 

Metamunge_df = Meta_Mean_df.pivot(index='Timepoint', columns='Drug', values = 'Metastatic Sites') 
Metamunge_df

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.074833,0.108588,0.091652,0.0981,0.093618,0.100947,0.095219,0.066332,0.087178,0.077709
10,0.125433,0.152177,0.159364,0.142018,0.163577,0.115261,0.10569,0.090289,0.123672,0.109109
15,0.132048,0.180625,0.194015,0.191381,0.158651,0.190221,0.136377,0.115261,0.153439,0.111677
20,0.161621,0.241034,0.234801,0.23668,0.181731,0.234064,0.171499,0.11943,0.200905,0.166378
25,0.181818,0.258831,0.265753,0.288275,0.18524,0.263888,0.199095,0.11943,0.219824,0.236621
30,0.172944,0.249479,0.227823,0.347467,0.266667,0.300264,0.266469,0.139968,0.230641,0.248168
35,0.169496,0.266526,0.224733,0.361418,0.330464,0.341412,0.366667,0.145997,0.240983,0.285714
40,0.17561,0.289128,0.314466,0.315725,0.321702,0.297294,0.433903,0.160591,0.312815,0.299791
45,0.202591,0.286101,0.30932,0.278722,0.351104,0.30424,0.428571,0.190221,0.359062,0.2864


In [None]:
# Generate the Plot (with Error Bars)

# Save the Figure

# Show the Figure


## Survival Rates

In [27]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)

Mouse_Count = mouse_trial_df[["Timepoint", "Drug", "Mouse ID"]] 
Mouse_group = Mouse_Count.groupby(["Drug", "Timepoint"]) 
Mouse_count_group = Mouse_group.count()
#Mouse_count_group.head()
Mouse_count_df = pd.DataFrame(Mouse_count_group).reset_index()
Mouse_count_df

Unnamed: 0,Drug,Timepoint,Mouse ID
0,Capomulin,0,25
1,Capomulin,5,25
2,Capomulin,10,25
3,Capomulin,15,24
4,Capomulin,20,23
5,Capomulin,25,22
6,Capomulin,30,22
7,Capomulin,35,22
8,Capomulin,40,21
9,Capomulin,45,21


In [30]:
# Minor Data Munging to Re-Format the Data Frames

#Mouse_count_m = Meta_type.groupby(["Timepoint", "Drug"]) 

Mousecount_df = Mouse_count_df.pivot(index='Timepoint', columns='Drug', values = 'Mouse ID') 
Mousecount_df

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,25,25,25,25,25,25,26,25,26,25
5,25,21,25,23,23,24,25,25,25,24
10,25,20,21,22,21,24,23,24,23,22
15,24,19,21,19,21,20,17,24,23,21
20,23,18,20,19,20,19,17,23,21,17
25,22,18,18,19,18,17,14,23,19,16
30,22,16,17,18,15,15,13,23,18,15
35,22,14,12,17,15,14,10,21,16,14
40,21,14,10,15,15,12,9,20,12,14
45,21,13,9,11,13,11,7,20,11,14


In [None]:
# Generate the Plot (Accounting for percentages)

# Save the Figure

# Show the Figure
plt.show()

## Summary Bar Graph

In [None]:
# Calculate the percent changes for each drug

# Display the data to confirm


In [None]:
# Store all Relevant Percent Changes into a Tuple


# Splice the data between passing and failing drugs


# Orient widths. Add labels, tick marks, etc. 


# Use functions to label the percentages of changes


# Call functions to implement the function calls


# Save the Figure


# Show the Figure
fig.show()