In [1]:
# Dependencies and Setup
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# File to Load (Remember to Change These)
mouse_drug_data_to_load = "data/mouse_drug_data.csv"
clinical_trial_data_to_load = "data/clinicaltrial_data.csv"

# Read the Mouse and Drug Data and the Clinical Trial Data
mouse_drug = pd.read_csv("data/mouse_drug_data.csv")
clinical_trial = pd.read_csv("data/clinicaltrial_data.csv")

# Combine the data into a single dataset
mcc = pd.merge(clinical_trial, mouse_drug,how='outer', on='Mouse ID')
only_four = mcc.loc[(mcc["Drug"] == "Capomulin") | (
    mcc["Drug"] == "Infubinol")| (
    mcc["Drug"] == "Ketapril")| (
    mcc["Drug"] == "Placebo"), :]

# Display the data table for preview

only_four

ModuleNotFoundError: No module named 'matplotlib'

## Tumor Response to Treatment

In [None]:
# Store the Mean Tumor Volume Data Grouped by Drug and Timepoint 
combine_group_mean = only_four.groupby(["Drug","Timepoint"]).mean()
combine_group_mean.reset_index(level = None, inplace = True)

# Convert to DataFrame
tumor_response_mean_df = pd.DataFrame(combine_group_mean)
organized_tumor_response= tumor_response_mean_df[["Drug","Timepoint","Tumor Volume (mm3)"]]
# Preview DataFrame 
organized_tumor_response

In [None]:
# Store the Standard Error of Tumor Volumes Grouped by Drug and Timepoint
combine_group_sem = only_four.groupby(["Drug","Timepoint"]).sem()
combine_group_sem.reset_index(level = None, inplace = True)

# Convert to DataFrame
tumor_response_sem_tumorvol_df = pd.DataFrame(combine_group_sem)
organized_tumor = tumor_response_sem_tumorvol_df[["Drug","Timepoint", "Tumor Volume (mm3)"]]
# Preview DataFrame 
organized_tumor.head()

In [None]:
pivot_table = tumor_response_mean_df.pivot(index ="Timepoint", columns = 'Drug', values = "Tumor Volume (mm3)")
pivot_table.reset_index(level = None, inplace = True)

#Preview the formatted table
pivot_table.head()

In [None]:
# Generate the Plot (with Error Bars)
plt.figure(figsize = (10, 5))

plt.errorbar(x=pivot_table['Timepoint'],y=pivot_table['Capomulin'], yerr=None, linestyle="--", fmt='o',color = 'g')
plt.errorbar(x=pivot_table['Timepoint'],y=pivot_table['Infubinol'], yerr=None, linestyle="--", fmt='o',color='r')
plt.errorbar(x=pivot_table['Timepoint'],y=pivot_table['Ketapril'], yerr=None, linestyle="--",fmt='o',color='r')
plt.errorbar(x=pivot_table['Timepoint'],y=pivot_table['Placebo'], yerr=None, linestyle="--", fmt='o',color='r')

plt.ylabel('Tumor Volume(mm3)')
plt.xlabel('Time (Days)')
plt.title('Tumor Response to Treatment')
plt.grid()

In [None]:
# Show the Figure
plt.show()

![Tumor Response to Treatment](../Images/treatment.png)

## Metastatic Response to Treatment

In [None]:
# Store the Mean Met. Site Data Grouped by Drug and Timepoint 
combine_group_mean_met= only_four.groupby(["Drug","Timepoint"]).mean()

# Convert to DataFrame
met_response_mean_df = pd.DataFrame(combine_group_mean_met["Metastatic Sites"])

# Preview DataFrame 
met_response_mean_df.head()

In [None]:
# Store the Standard Error associated with Met. Sites Grouped by Drug and Timepoint 
combine_group_sem = only_four.groupby(["Drug","Timepoint"]).sem()
combine_group_sem.reset_index(level = None, inplace = True)

# Convert to DataFrame
tumor_response_sem_tumorvol_df = pd.DataFrame(combine_group_sem)
organized_tumor= tumor_response_sem_tumorvol_df[["Drug","Timepoint","Metastatic Sites"]]
# Preview DataFrame 
organized_tumor.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
# Preview that Reformatting worked
# Store the Mean "Tumor Volume Data" Grouped by Drug and Timepoint 
met_response_mean_df.reset_index(level = None, inplace = True)
met_response_mean_df2 = pd.DataFrame(combine_group_mean)


pivot_table_met = met_response_mean_df2.pivot(index ="Timepoint", columns = 'Drug', values = "Metastatic Sites")
pivot_table_met.reset_index(level = None, inplace = True)

#Preview the formatted table
pivot_table_met.head()

In [None]:
plt.figure(figsize = (10, 5))
#yerr = standard error DF
plt.errorbar(x=pivot_table_met['Timepoint'],y=pivot_table_met['Capomulin'], yerr=None['Capomulin'], linestyle="--", fmt='o')
plt.errorbar(x=pivot_table_met['Timepoint'],y=pivot_table_met['Infubinol'], yerr=None, linestyle="--", fmt='o')
plt.errorbar(x=pivot_table_met['Timepoint'],y=pivot_table_met['Ketapril'], yerr=None, linestyle="--",fmt='o')
plt.errorbar(x=pivot_table_met['Timepoint'],y=pivot_table_met['Placebo'], yerr=None, linestyle="--", fmt='o')

plt.ylabel("Met Sites")
plt.xlabel('Time (Days)')
plt.title('Metastatic Response to Treatment')      
plt.show()

![Metastatic Spread During Treatment](../Images/spread.png)

## Survival Rates

In [None]:
# Store the Count of Mice Grouped by Drug and Timepoint (W can pass any metric)

micecount=only_four.groupby(["Drug","Timepoint"]).count()

# Convert to DataFrame
micecount_df=pd.DataFrame(micecount["Mouse ID"])
micecount_df.reset_index(inplace=True)

#Preview dataframe
micecount_df.head()

In [None]:
# Minor Data Munging to Re-Format the Data Frames
pivot_table_mice = micecount_df.pivot(index ="Timepoint", columns = 'Drug', values = "Mouse ID")
pivot_table_mice.reset_index(level = None, inplace = True)
#Preview the formatted table
pivot_table_mice.head()

In [None]:
survival_fourdrugs_df = pivot_table_mice.astype(float)
survival_fourdrugs_df["Capomulin_percent"]=pivot_table_mice["Capomulin"]/pivot_table_mice["Capomulin"].iloc[0] * 100
survival_fourdrugs_df["Infubinol_percent"]=pivot_table_mice["Infubinol"]/pivot_table_mice["Infubinol"].iloc[0] * 100
survival_fourdrugs_df["Ketapril_percent"]=pivot_table_mice["Ketapril"]/pivot_table_mice["Ketapril"].iloc[0] * 100
survival_fourdrugs_df["Placebo_percent"]=pivot_table_mice["Placebo"]/pivot_table_mice["Placebo"].iloc[0] * 100
survival_fourdrugs_df

![Metastatic Spread During Treatment](../Images/survival.png)

## Summary Bar Graph

In [None]:
# Calculate the percent changes for each drug

# Display the data to confirm


In [None]:
# Store all Relevant Percent Changes into a Tuple
Capomulin_percent=(table_fourdrugs["Capomulin"].iloc[9]-table_fourdrugs["Capomulin"].iloc[0])/table_fourdrugs["Capomulin"].iloc[0]*100
Infubinol_percent=(table_fourdrugs["Infubinol"].iloc[9]-table_fourdrugs["Infubinol"].iloc[0])/table_fourdrugs["Infubinol"].iloc[0]*100
Ketapril_percent=(table_fourdrugs["Ketapril"].iloc[9]-table_fourdrugs["Ketapril"].iloc[0])/table_fourdrugs["Ketapril"].iloc[0]*100
Placebo_percent=(table_fourdrugs["Placebo"].iloc[9]-table_fourdrugs["Placebo"].iloc[0])/table_fourdrugs["Placebo"].iloc[0]*100
percent_tuple = {'Capomulin': Capomulin_percent, 'Infubinol': Infubinol_percent, 'Ketapril': Ketapril_percent, 'Placebo': Placebo_percent}
percentchange_tumorvolume = pd.Series(percent_tuple)
summary_bar = plt.subplot()
x_axis = np.arange(0, len(testdrugs))
# Splice the data between passing and failing drugs
# Orient widths. Add labels, tick marks, etc. 

tick_locations = []
for x in x_axis:
    tick_locations.append(x + 0.5)
plt.xticks(tick_locations, testdrugs)
colors = []
for value in percentchange_tumorvolume:
    if value >= 0 :
        colors.append('r')
    else:
        colors.append('g')
        
percent_change = summary_bar.bar(x_axis, percentchange_tumorvolume, color=colors, align="edge")

plt.title("Tumor Change Over 45 Days Treatment")
plt.ylabel("% Tumor Volume Change")
plt.xlim(-0.25, len(testdrugs))
plt.ylim(-30, max(percentchange_tumorvolume) + 20)
plt.grid()

plt.show()


![Metastatic Spread During Treatment](../Images/change.png)