In [1]:
%matplotlib notebook

In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"
mouse_drug_data_to_load = "data/mouse_drug_data.csv"

clinical_data = pd.read_csv(clinical_trial_data_to_load)
mouse_data = pd.read_csv(mouse_drug_data_to_load)



In [5]:
clinical_data

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,b128,0,45.000000,0
1,f932,0,45.000000,0
2,g107,0,45.000000,0
3,a457,0,45.000000,0
4,c819,0,45.000000,0
...,...,...,...,...
1888,r944,45,41.581521,2
1889,u364,45,31.023923,3
1890,p438,45,61.433892,1
1891,x773,45,58.634971,4


In [6]:
mouse_data

Unnamed: 0,Mouse ID,Drug
0,f234,Stelasyn
1,x402,Stelasyn
2,a492,Stelasyn
3,w540,Stelasyn
4,v764,Stelasyn
...,...,...
245,i669,Placebo
246,r850,Placebo
247,a262,Placebo
248,q787,Placebo


In [7]:
combined_clinical_mouse_data = pd.merge(clinical_data, mouse_data, how ="outer", on="Mouse ID")
combined_clinical_mouse_data.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug
0,b128,0,45.0,0,Capomulin
1,b128,5,45.651331,0,Capomulin
2,b128,10,43.270852,0,Capomulin
3,b128,15,43.784893,0,Capomulin
4,b128,20,42.731552,0,Capomulin


In [8]:
drug_time_group = school_group = combined_clinical_mouse_data.groupby(['Drug','Timepoint'])
drug_time_group

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x114bbebd0>

In [9]:
tumor_volume_mean = round(drug_time_group["Tumor Volume (mm3)"].mean(),3)
tumor_volume_mean

Drug       Timepoint
Capomulin  0            45.000
           5            44.266
           10           43.084
           15           42.064
           20           40.716
                         ...  
Zoniferol  25           55.433
           30           57.714
           35           60.089
           40           62.917
           45           65.961
Name: Tumor Volume (mm3), Length: 100, dtype: float64

In [10]:
tumor_response_1 = pd.DataFrame(tumor_volume_mean)
tumor_response_1 = tumor_response_1.reset_index()
tumor_response_1

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,45.000
1,Capomulin,5,44.266
2,Capomulin,10,43.084
3,Capomulin,15,42.064
4,Capomulin,20,40.716
...,...,...,...
95,Zoniferol,25,55.433
96,Zoniferol,30,57.714
97,Zoniferol,35,60.089
98,Zoniferol,40,62.917


In [11]:
tumor_volume_sem = round(drug_time_group["Tumor Volume (mm3)"].sem(),3)
tumor_volume_sem

Drug       Timepoint
Capomulin  0            0.000
           5            0.449
           10           0.703
           15           0.839
           20           0.910
                        ...  
Zoniferol  25           0.603
           30           0.800
           35           0.881
           40           0.999
           45           1.004
Name: Tumor Volume (mm3), Length: 100, dtype: float64

In [12]:
tumor_response_2 = pd.DataFrame(tumor_volume_sem)
tumor_response_2 = tumor_response_2.reset_index()
tumor_response_2

Unnamed: 0,Drug,Timepoint,Tumor Volume (mm3)
0,Capomulin,0,0.000
1,Capomulin,5,0.449
2,Capomulin,10,0.703
3,Capomulin,15,0.839
4,Capomulin,20,0.910
...,...,...,...
95,Zoniferol,25,0.603
96,Zoniferol,30,0.800
97,Zoniferol,35,0.881
98,Zoniferol,40,0.999


In [13]:
tumor_response_1.pivot(index="Timepoint",columns="Drug",values="Tumor Volume (mm3)")

Drug,Capomulin,Ceftamin,Infubinol,Ketapril,Naftisol,Placebo,Propriva,Ramicane,Stelasyn,Zoniferol
Timepoint,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
5,44.266,46.503,47.062,47.389,46.796,47.126,47.249,43.945,47.527,46.852
10,43.084,48.285,49.404,49.582,48.694,49.423,49.102,42.532,49.464,48.69
15,42.064,50.094,51.296,52.4,50.933,51.36,51.067,41.495,51.529,50.779
20,40.716,52.157,53.198,54.921,53.644,54.364,53.347,40.238,54.067,53.17
25,39.94,54.288,55.715,57.679,56.732,57.483,55.504,38.974,56.166,55.433
30,38.769,56.77,58.299,60.995,59.56,59.809,58.196,38.703,59.827,57.714
35,37.817,58.828,60.742,63.372,62.685,62.421,60.35,37.452,62.441,60.089
40,36.958,61.468,63.163,66.069,65.601,65.053,63.046,36.574,65.356,62.917
45,36.236,64.132,65.756,70.663,69.266,68.084,66.259,34.956,68.438,65.961


In [14]:
#tumor_response_1.plot.scatter(x="Timepoint", y="Drug",c="blue")

fig, ax = plt.scatter("Timepoint", "Drug")
#ax.errorbar(np.arange(0, len(tumor_response_1), 1)+1,tumor_volume_mean, yerr=tumor_volume_sem, fmt="o", color="b",
            #alpha=0.5, label="Mean of House Prices")
#ax.set_xlim(0, len(tumor_response_1)+1)
#ax.set_xlabel("Sample Number")
#ax.set_ylabel("Mean of Median House Prices ($1000)")
#plt.legend(loc="best", fontsize="small", fancybox=True)
#plt.show()

ValueError: scatter requires y column to be numeric