In [57]:
%matplotlib notebook
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as sts
import numpy as np
mouse_df = pd.read_csv('./Resources/Mouse_metadata.csv')
result_df = pd.read_csv('./Resources/Study_results.csv')

In [58]:
mouse_results = pd.merge(result_df, mouse_df, on ="Mouse ID")
mouse_results

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug Regimen,Sex,Age_months,Weight (g)
0,b128,0,45.000000,0,Capomulin,Female,9,22
1,b128,5,45.651331,0,Capomulin,Female,9,22
2,b128,10,43.270852,0,Capomulin,Female,9,22
3,b128,15,43.784893,0,Capomulin,Female,9,22
4,b128,20,42.731552,0,Capomulin,Female,9,22
...,...,...,...,...,...,...,...,...
1888,m601,25,33.118756,1,Capomulin,Male,22,17
1889,m601,30,31.758275,1,Capomulin,Male,22,17
1890,m601,35,30.834357,1,Capomulin,Male,22,17
1891,m601,40,31.378045,1,Capomulin,Male,22,17


In [59]:
#drop duplicate timepoint
dropped_dupl = mouse_results.drop_duplicates(["Mouse ID", "Timepoint"]) 
dropped_dupl

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug Regimen,Sex,Age_months,Weight (g)
0,b128,0,45.000000,0,Capomulin,Female,9,22
1,b128,5,45.651331,0,Capomulin,Female,9,22
2,b128,10,43.270852,0,Capomulin,Female,9,22
3,b128,15,43.784893,0,Capomulin,Female,9,22
4,b128,20,42.731552,0,Capomulin,Female,9,22
...,...,...,...,...,...,...,...,...
1888,m601,25,33.118756,1,Capomulin,Male,22,17
1889,m601,30,31.758275,1,Capomulin,Male,22,17
1890,m601,35,30.834357,1,Capomulin,Male,22,17
1891,m601,40,31.378045,1,Capomulin,Male,22,17


In [61]:
#Generate a summary statistics table consisting of the mean, median, variance, standard deviation, 
#and SEM of the tumor volume for each drug regimen.
drug_tumor = dropped_dupl[["Tumor Volume (mm3)", "Drug Regimen"]]
drug_df = drug_tumor.groupby(["Drug Regimen"])
mean_tumor = drug_df.mean()
mean_tumor = mean_tumor.rename(columns ={"Tumor Volume (mm3)": "Mean Tumor Volume (mm3)"})
mean_tumor

Unnamed: 0_level_0,Mean Tumor Volume (mm3)
Drug Regimen,Unnamed: 1_level_1
Capomulin,40.675741
Ceftamin,52.591172
Infubinol,52.884795
Ketapril,55.235638
Naftisol,54.331565
Placebo,54.033581
Propriva,52.393463
Ramicane,40.216745
Stelasyn,54.233149
Zoniferol,53.236507


In [65]:
median_tumor = drug_df.median()
median_tumor = median_tumor.rename(columns ={"Tumor Volume (mm3)": "Median Tumor Volume (mm3)"})
median_tumor

Unnamed: 0_level_0,Median Tumor Volume (mm3)
Drug Regimen,Unnamed: 1_level_1
Capomulin,41.557809
Ceftamin,51.776157
Infubinol,51.820584
Ketapril,53.698743
Naftisol,52.509285
Placebo,52.288934
Propriva,50.909965
Ramicane,40.673236
Stelasyn,52.431737
Zoniferol,51.818479


In [67]:
variance_tumor = drug_df.var()
variance_tumor = variance_tumor.rename(columns ={"Tumor Volume (mm3)": "Variance Tumor Volume (mm3)"})
variance_tumor

Unnamed: 0_level_0,Variance Tumor Volume (mm3)
Drug Regimen,Unnamed: 1_level_1
Capomulin,24.947764
Ceftamin,39.290177
Infubinol,43.128684
Ketapril,68.553577
Naftisol,66.173479
Placebo,61.168083
Propriva,43.138803
Ramicane,23.486704
Stelasyn,59.450562
Zoniferol,48.533355


In [71]:
std_tumor = drug_df.std()
std_tumor = std_tumor.rename(columns ={"Tumor Volume (mm3)": "Standard Deviation Tumor Volume (mm3)"})
std_tumor

Unnamed: 0_level_0,Standard Deviation Tumor Volume (mm3)
Drug Regimen,Unnamed: 1_level_1
Capomulin,4.994774
Ceftamin,6.268188
Infubinol,6.567243
Ketapril,8.279709
Naftisol,8.134708
Placebo,7.821003
Propriva,6.568014
Ramicane,4.846308
Stelasyn,7.710419
Zoniferol,6.966589


In [72]:
SEM_tumor = drug_df.sem()
SEM_tumor = SEM_tumor.rename(columns ={"Tumor Volume (mm3)": "SEM Tumor Volume (mm3)"})
SEM_tumor

Unnamed: 0_level_0,SEM Tumor Volume (mm3)
Drug Regimen,Unnamed: 1_level_1
Capomulin,0.329346
Ceftamin,0.469821
Infubinol,0.492236
Ketapril,0.60386
Naftisol,0.596466
Placebo,0.581331
Propriva,0.525862
Ramicane,0.320955
Stelasyn,0.573111
Zoniferol,0.516398


In [74]:
merged1 = pd.merge(std_tumor, SEM_tumor, on="Drug Regimen")
merged2 = pd.merge(median_tumor, merged1, on="Drug Regimen")
merged3 = pd.merge()

Unnamed: 0_level_0,Standard Deviation Tumor Volume (mm3),SEM Tumor Volume (mm3)
Drug Regimen,Unnamed: 1_level_1,Unnamed: 2_level_1
Capomulin,4.994774,0.329346
Ceftamin,6.268188,0.469821
Infubinol,6.567243,0.492236
Ketapril,8.279709,0.60386
Naftisol,8.134708,0.596466
Placebo,7.821003,0.581331
Propriva,6.568014,0.525862
Ramicane,4.846308,0.320955
Stelasyn,7.710419,0.573111
Zoniferol,6.966589,0.516398


In [12]:
#Generate a bar plot using both Pandas’s DataFrame.plot() and Matplotlib’s pyplot that shows the total number of measurements taken for each treatment regimen throughout the course of the study.
ID_group = dropped_dupl.groupby(["Mouse ID"])
num_measurements = ID_group.count()
num_measurements = num_measurements[["Timepoint"]]
num_measurements = num_measurements.rename(columns = {"Timepoint": "Number of Measurements"})
num_measurements
measurement_chart = num_measurements.plot(kind="bar", title = "Measurements for each speciment")

plt.show()
plt.tight_layout()

Unnamed: 0_level_0,Number of Measurements
Mouse ID,Unnamed: 1_level_1
a203,10
a251,10
a262,10
a275,10
a366,7
...,...
z435,3
z578,10
z581,10
z795,10


In [14]:
#Generate a pie plot using both Pandas’s DataFrame.plot() and Matplotlib’s pyplot that shows the distribution of female or male mice in the study.

gender_mouse = mouse_results.groupby(["Sex", "Mouse ID"])
gender = gender_mouse.count()
gender_count = gender.groupby(["Sex"]) 
counted = gender_count.count()
counted = counted[["Timepoint"]]
counted = counted.rename(columns = {"Timepoint": "Number of Mice"})
counted


Unnamed: 0_level_0,Number of Mice
Sex,Unnamed: 1_level_1
Female,124
Male,125


In [31]:
counted_pie = counted.plot(kind = "pie", y="Number of Mice", title = "Numbers by Gender")
plt.show()

<IPython.core.display.Javascript object>

In [32]:
Sex = ["Male", "Female"]
Number = [124, 125]
explode = [0, 0]
chart2 = plt.pie(Number, explode=explode, labels=Sex,
        autopct="%1.1f%%", shadow=False, startangle=90)
plt.show()

In [None]:
#Calculate the final tumor volume of each mouse across four of the most promising treatment regimens: Capomulin, Ramicane, Infubinol, and Ceftamin. Calculate the quartiles and IQR and quantitatively determine if there are any potential outliers across all four treatment regimens.

In [None]:
#Using Matplotlib, generate a box and whisker plot of the final tumor volume for all four treatment regimens and highlight any potential outliers in the plot by changing their color and style.

In [None]:
#Select a mouse that was treated with Capomulin and generate a line plot of tumor volume vs. time point for that mouse.

In [None]:
#Generate a scatter plot of mouse weight versus average tumor volume for the Capomulin treatment regimen.

In [None]:
#Calculate the correlation coefficient and linear regression model between mouse weight and average tumor volume for the Capomulin treatment. Plot the linear regression model on top of the previous scatter plot.

In [None]:
#Look across all previously generated figures and tables and write at least three observations or inferences that can be made from the data. Include these observations at the top of notebook.