In [1]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
from scipy.stats import sem

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)data.cs
mouse_path =os.path.join("..","..","Resources","data","mouse_drug_data.csv")
clinical_path =os.path.join("..","..","Resources","data","clinicaltrial_data.csv")

# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_df=pd.read_csv(mouse_path)
clinical_df=pd.read_csv(clinical_path)
mouse_df=pd.DataFrame(mouse_df)
clinical_df=pd.DataFrame(clinical_df)
# print(mouse_df.head())
# print(clinical_df.head())
# print(mouse_df.shape)
# print(clinical_df.shape)

# Combine the data into a single dataset
merged_df=pd.merge(clinical_df,mouse_df,on="Mouse ID",how="outer")

# Display the data table for preview
print(merged_df.head())
# print(merged_df.shape)

  Mouse ID  Timepoint  Tumor Volume (mm3)  Metastatic Sites       Drug
0     b128          0           45.000000                 0  Capomulin
1     b128          5           45.651331                 0  Capomulin
2     b128         10           43.270852                 0  Capomulin
3     b128         15           43.784893                 0  Capomulin
4     b128         20           42.731552                 0  Capomulin


## Tumor Response to Treatment 

In [2]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
tumor_group=merged_df.groupby(['Drug','Timepoint'])

# Convert to DataFrame
tumor_df=pd.DataFrame(tumor_group['Tumor Volume (mm3)'].mean())
tumor_df=tumor_df.reset_index()

# Preview DataFrame
print(tumor_df.head())

        Drug  Timepoint  Tumor Volume (mm3)
0  Capomulin          0           45.000000
1  Capomulin          5           44.266086
2  Capomulin         10           43.084291
3  Capomulin         15           42.064317
4  Capomulin         20           40.716325


In [8]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
sem_group=tumor_df.groupby(['Timepoint','Drug'],sort=False).sem().unstack(fill_value=0)
print(sem_group.head())

# tumor_sem=tumor_df['Tumor Volume (mm3)'].sem()
# print(tumor_sem)

# Convert to DataFrame

# Preview DataFrame

          Tumor Volume (mm3)                                               \
Drug               Capomulin Ceftamin Infubinol Ketapril Naftisol Placebo   
Timepoint                                                                   
0                        NaN      NaN       NaN      NaN      NaN     NaN   
5                        NaN      NaN       NaN      NaN      NaN     NaN   
10                       NaN      NaN       NaN      NaN      NaN     NaN   
15                       NaN      NaN       NaN      NaN      NaN     NaN   
20                       NaN      NaN       NaN      NaN      NaN     NaN   

                                                
Drug      Propriva Ramicane Stelasyn Zoniferol  
Timepoint                                       
0              NaN      NaN      NaN       NaN  
5              NaN      NaN      NaN       NaN  
10             NaN      NaN      NaN       NaN  
15             NaN      NaN      NaN       NaN  
20             NaN      NaN      NaN    