In [26]:
%matplotlib widget

## Observations and Insights 

In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as sem
import numpy as np
from scipy.stats import linregress

# Study data files
mouse_metadata_path = "data/Mouse_metadata.csv"
study_results_path = "data/Study_results.csv"

# Read the mouse data and the study results
mouse_metadata = pd.read_csv(mouse_metadata_path)
study_results = pd.read_csv(study_results_path)

# Combine the data into a single dataset
MergedMouse = pd.merge(study_results, mouse_metadata, on=["Mouse ID"], how="outer")
# Display the data table for preview
mouse_metadata
study_results


Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites
0,b128,0,45.000000,0
1,f932,0,45.000000,0
2,g107,0,45.000000,0
3,a457,0,45.000000,0
4,c819,0,45.000000,0
...,...,...,...,...
1888,r944,45,41.581521,2
1889,u364,45,31.023923,3
1890,p438,45,61.433892,1
1891,x773,45,58.634971,4


In [3]:
mouse_metadata.head()

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g)
0,k403,Ramicane,Male,21,16
1,s185,Capomulin,Female,3,17
2,x401,Capomulin,Female,16,15
3,m601,Capomulin,Male,22,17
4,g791,Ramicane,Male,11,16


In [4]:
MergedMouse

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug Regimen,Sex,Age_months,Weight (g)
0,b128,0,45.000000,0,Capomulin,Female,9,22
1,b128,5,45.651331,0,Capomulin,Female,9,22
2,b128,10,43.270852,0,Capomulin,Female,9,22
3,b128,15,43.784893,0,Capomulin,Female,9,22
4,b128,20,42.731552,0,Capomulin,Female,9,22
...,...,...,...,...,...,...,...,...
1888,m601,25,33.118756,1,Capomulin,Male,22,17
1889,m601,30,31.758275,1,Capomulin,Male,22,17
1890,m601,35,30.834357,1,Capomulin,Male,22,17
1891,m601,40,31.378045,1,Capomulin,Male,22,17


In [5]:
# Checking the number of mice.
TotalMice=MergedMouse["Mouse ID"].nunique()
TotalMice

249

In [6]:
# Optional: Get all the data for the duplicate mouse ID. 
duplicates = MergedMouse[MergedMouse.duplicated(["Mouse ID", "Timepoint"])]
duplicates

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug Regimen,Sex,Age_months,Weight (g)
861,g989,0,45.0,0,Propriva,Female,21,26
863,g989,5,47.570392,0,Propriva,Female,21,26
865,g989,10,49.880528,0,Propriva,Female,21,26
867,g989,15,53.44202,0,Propriva,Female,21,26
869,g989,20,54.65765,1,Propriva,Female,21,26


In [7]:
CleanMerged = MergedMouse.loc[MergedMouse["Mouse ID"] != "g989", :]
CleanMerged

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug Regimen,Sex,Age_months,Weight (g)
0,b128,0,45.000000,0,Capomulin,Female,9,22
1,b128,5,45.651331,0,Capomulin,Female,9,22
2,b128,10,43.270852,0,Capomulin,Female,9,22
3,b128,15,43.784893,0,Capomulin,Female,9,22
4,b128,20,42.731552,0,Capomulin,Female,9,22
...,...,...,...,...,...,...,...,...
1888,m601,25,33.118756,1,Capomulin,Male,22,17
1889,m601,30,31.758275,1,Capomulin,Male,22,17
1890,m601,35,30.834357,1,Capomulin,Male,22,17
1891,m601,40,31.378045,1,Capomulin,Male,22,17


In [8]:
# Checking the number of mice in the clean DataFrame.
TotalMice=len(CleanMerged["Mouse ID"].unique())
TotalMice

248

In [9]:
# Grouped = CleanMerged.groupby('Drug Regimen')['Tumor Volume (mm3)']
# Grouped.head()

## Summary Statistics

In [10]:
# Generate a summary statistics table of mean, median, variance, standard deviation, and SEM of the tumor volume for each regimen

# This method is the most straighforward, creating multiple series and putting them all together at the end.
Summary = CleanMerged
# Use Aggregate to calculate the mean, median, variance, standard deviation, and SEM of the tumor volume for each regimen

Summary = Summary.groupby('Drug Regimen')['Tumor Volume (mm3)'].agg(['count','mean','var', 'std', 'sum', 'sem'])

# FOrmatting
Summary["mean"] = Summary["mean"].map("{:.2f}".format)
Summary["var"] = Summary["var"].map("{:.2f}".format)
Summary["std"] = Summary["std"].map("{:.2f}".format)
Summary["sum"] = Summary["sum"].map("{:.2f}".format)
Summary["sem"] = Summary["sem"].map("{:.3f}".format)

Summary

Unnamed: 0_level_0,count,mean,var,std,sum,sem
Drug Regimen,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Capomulin,230,40.68,24.95,4.99,9355.42,0.329
Ceftamin,178,52.59,39.29,6.27,9361.23,0.47
Infubinol,178,52.88,43.13,6.57,9413.49,0.492
Ketapril,188,55.24,68.55,8.28,10384.3,0.604
Naftisol,186,54.33,66.17,8.13,10105.67,0.596
Placebo,181,54.03,61.17,7.82,9780.08,0.581
Propriva,148,52.32,43.85,6.62,7743.5,0.544
Ramicane,228,40.22,23.49,4.85,9169.42,0.321
Stelasyn,181,54.23,59.45,7.71,9816.2,0.573
Zoniferol,182,53.24,48.53,6.97,9689.04,0.516


## Bar and Pie Charts

In [11]:
# Generate a bar plot showing the total number of mice for each treatment throughout the course of the study using pandas. 

PandasChart = Summary.plot(kind="bar", figsize=(9,5), title='Count of Mice/ Drug Regimen')
plt.tight_layout()


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [12]:
ResetSummary = Summary.reset_index()
ResetSummary

Unnamed: 0,Drug Regimen,count,mean,var,std,sum,sem
0,Capomulin,230,40.68,24.95,4.99,9355.42,0.329
1,Ceftamin,178,52.59,39.29,6.27,9361.23,0.47
2,Infubinol,178,52.88,43.13,6.57,9413.49,0.492
3,Ketapril,188,55.24,68.55,8.28,10384.3,0.604
4,Naftisol,186,54.33,66.17,8.13,10105.67,0.596
5,Placebo,181,54.03,61.17,7.82,9780.08,0.581
6,Propriva,148,52.32,43.85,6.62,7743.5,0.544
7,Ramicane,228,40.22,23.49,4.85,9169.42,0.321
8,Stelasyn,181,54.23,59.45,7.71,9816.2,0.573
9,Zoniferol,182,53.24,48.53,6.97,9689.04,0.516


In [13]:
%matplotlib widget

# Generate a bar plot showing the total number of mice for each treatment throughout the course of the study using pyplot.

# Set x axis and tick locations
x_axis = np.arange(len(Summary))
tick_locations = [value for value in x_axis]

# Create a list indicating where to write x labels and set figure size to adjust for space
plt.figure(figsize=(9,5))
plt.bar(x_axis, ResetSummary["count"], color='r', alpha=0.5, align="center")
plt.xticks(tick_locations, ResetSummary['Drug Regimen'], rotation="vertical")

# Set x and y limits
plt.xlim(-0.5, len(x_axis)-0.5)
plt.ylim(0, max(ResetSummary["count"])+10)

# Set a Title and labels
plt.title("Count of Mice/ Drug Regimen")
plt.xlabel("Drug Regemin")
plt.ylabel("Mice Count")
plt.tight_layout()



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [14]:
# Generate a pie plot showing the distribution of female versus male mice using pandas
# Its not celar what you mean. Either the genders of all 248 mice or the gender of the mice for amount of treatments

# Gender of the mice using total treatments amounts

Gender_Treatments = CleanMerged.groupby('Sex').count()
Gender_Treatments

PandasGenderChart = Gender_Treatments.plot.pie(y="Mouse ID")
plt.tight_layout()

# Gender of the total mice
Gender_allMice = mouse_metadata
Gender_allMice = mouse_metadata.groupby('Sex').count()

PandasGenderChart2 = Gender_allMice.plot.pie(y="Mouse ID")
plt.tight_layout()

# # Gender_allMice
Gender_Treatments


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Unnamed: 0_level_0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug Regimen,Age_months,Weight (g)
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Female,922,922,922,922,922,922,922
Male,958,958,958,958,958,958,958


In [15]:
# These two plots show differences as the 

In [16]:
%matplotlib widget
# To reset index so I can get it to work in pyplot
Gender_Treatments_i = Gender_Treatments.reset_index()
Gender_Treatments_i

# Generate a pie plot showing the distribution of female versus male mice using pyplot
# Create Labels, values, colours and explode variables to use
labels = Gender_Treatments_i['Sex'].tolist()
values = Gender_Treatments_i['Mouse ID'].tolist()
colors = ["lightcoral", "lightskyblue"]
explode = (0.1, 0) 

plt.title("Mice Gender By All Treatments")
plt.pie(values, labels=labels, colors=colors, explode=explode, autopct="%1.1f%%", shadow=True, startangle=150)
plt.axis("equal")
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [27]:
%matplotlib widget
# To reset index so I can get it to work in pyplot
Gender_Treatments_All = Gender_allMice.reset_index()
Gender_Treatments_All

# Generate a pie plot showing the distribution of female versus male of all treatments using pyplot
# Create Labels, values, colours and explode variables to use
labels_A = Gender_Treatments_All['Sex'].tolist()
values_A = Gender_Treatments_All['Mouse ID'].tolist()
colors = ["lightcoral", "lightskyblue"]
explode = (0.1, 0) 

plt.title("All Mice Gender")
plt.pie(values_A, labels=labels_A, colors=colors, explode=explode, autopct="%1.1f%%", shadow=True, startangle=150)
plt.axis("equal")
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [18]:
CleanMerged

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug Regimen,Sex,Age_months,Weight (g)
0,b128,0,45.000000,0,Capomulin,Female,9,22
1,b128,5,45.651331,0,Capomulin,Female,9,22
2,b128,10,43.270852,0,Capomulin,Female,9,22
3,b128,15,43.784893,0,Capomulin,Female,9,22
4,b128,20,42.731552,0,Capomulin,Female,9,22
...,...,...,...,...,...,...,...,...
1888,m601,25,33.118756,1,Capomulin,Male,22,17
1889,m601,30,31.758275,1,Capomulin,Male,22,17
1890,m601,35,30.834357,1,Capomulin,Male,22,17
1891,m601,40,31.378045,1,Capomulin,Male,22,17


In [19]:
FinalVol = CleanMerged.loc[CleanMerged["Timepoint"] == 45, :]
FinalVol

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug Regimen,Sex,Age_months,Weight (g)
9,b128,45,38.982878,2,Capomulin,Female,9,22
40,h246,45,74.104086,2,Ketapril,Male,13,30
50,p189,45,75.294936,4,Ketapril,Male,8,28
79,b559,45,73.051363,1,Naftisol,Male,20,26
89,y260,45,62.909441,4,Ketapril,Female,7,25
...,...,...,...,...,...,...,...,...
1844,l897,45,38.846876,1,Capomulin,Male,17,19
1857,t565,45,34.455298,0,Capomulin,Female,20,17
1867,i557,45,47.685963,1,Capomulin,Female,1,24
1877,m957,45,33.329098,1,Capomulin,Female,3,19


## Quartiles, Outliers and Boxplots

In [20]:
# Calculate the final tumor volume of each mouse across four of the treatment regimens:  
# Create a list of teh 4 drug regemins
AlldrugList = ['Capomulin', 'Ceftamin', 'Infubinol', 'Ketapril', 'Naftisol', 'Placebo', 'Propriva', 'Ramicane', 'Stelasyn', 'Zoniferol']
DrugList = ['Capomulin', 'Ceftamin', 'Infubinol', 'Ramicane']

#list timepoints
timepoints = np.arange(0, 50, 5)

# Capomulin, Ramicane, Infubinol, and Ceftamin
FinalCapomulin = FinalVol.loc[FinalVol["Drug Regimen"] == "Capomulin", :]
FinalRamicane = FinalVol.loc[FinalVol["Drug Regimen"] == "Ramicane", :]
FinalInfubinol = FinalVol.loc[FinalVol["Drug Regimen"] == "Infubinol", :]
FinalCeftamin = FinalVol.loc[FinalVol["Drug Regimen"] == "Ceftamin", :]

CapomulinList = FinalCapomulin["Tumor Volume (mm3)"]
RamicaneList = FinalRamicane["Tumor Volume (mm3)"]
InfubinolList = FinalInfubinol["Tumor Volume (mm3)"] 
CeftaminList = FinalCeftamin["Tumor Volume (mm3)"]

# Start by getting the last (greatest) timepoint for each mouse
MaxCapo = max(CapomulinList)
MaxRami = max(RamicaneList)
MaxInfu  = max(InfubinolList)
MaxCeft = max(CeftaminList)

# CapomulinList #, RamicaneList, InfubinolList, CeftaminList
# FinalCapomulin

In [None]:
### I know I should have done this in a loop, its been a busy week and I havent had as much time to spend on this. 
# But this works and its only 4 times, so not too ugly code

In [21]:
# For Capomulin 
#If the data is in a dataframe, we use pandas to give quartile calculations
Cap_quartiles = CapomulinList.quantile([.25,.5,.75])
Cap_lowerq = round(Cap_quartiles[0.25],2)
Cap_upperq = round(Cap_quartiles[0.75],2)
Cap_iqr = round(Cap_upperq-Cap_lowerq,2)

print("************************************************")
print("Quartile calculations for Capomulin drug treatment")
print(f"The lower quartile of final tumour volume  is: {Cap_lowerq}")
print(f"The upper quartile of final tumour volume  is: {Cap_upperq}")
print(f"The interquartile range of final tumour volume  is: {Cap_iqr}")
print(f"The median of final tumour volume is: {Cap_quartiles[0.5]:5.2f} ")

Cap_lower_bound = round(Cap_lowerq - (1.5*Cap_iqr),2)
Cap_upper_bound = round(Cap_upperq + (1.5*Cap_iqr),2)
print(f"Values below {Cap_lower_bound} could be outliers.")
print(f"Values above {Cap_upper_bound} could be outliers.")

# Calculate to see if there are any outliers in the Capomulin Final Tumour Volume data
CapOutliers = FinalCapomulin.loc[(FinalCapomulin["Tumor Volume (mm3)"] < Cap_lower_bound) | (FinalCapomulin["Tumor Volume (mm3)"] > Cap_upper_bound)]
if len(CapOutliers) ==0:
    print("No outliers were observed")
else:
    print(CapOutliers["Tumor Volume (mm3)"])


************************************************
Quartile calculations for Capomulin drug treatment
The lower quartile of final tumour volume  is: 32.38
The upper quartile of final tumour volume  is: 40.16
The interquartile range of final tumour volume  is: 7.78
The median of final tumour volume is: 37.31 
Values below 20.71 could be outliers.
Values above 51.83 could be outliers.
No outliers were observed


In [22]:
# For Ceftamin 
#If the data is in a dataframe, we use pandas to give quartile calculations
Ceft_quartiles = CeftaminList.quantile([.25,.5,.75])
Ceft_lowerq = round(Ceft_quartiles[0.25],2)
Ceft_upperq = round(Ceft_quartiles[0.75],2)
Ceft_iqr = round(Ceft_upperq-Ceft_lowerq,2)

print("************************************************")
print("Quartile calculations for Ceftamin drug treatment")
print(f"The lower quartile of final tumour volume  is: {Ceft_lowerq}")
print(f"The upper quartile of final tumour volume  is: {Ceft_upperq}")
print(f"The interquartile range of final tumour volume  is: {Ceft_iqr}")
print(f"The median of final tumour volume is: {Ceft_quartiles[0.5]:5.2f} ")

Ceft_lower_bound = Ceft_lowerq - (1.5*Ceft_iqr)
Ceft_upper_bound = Ceft_upperq + (1.5*Ceft_iqr)
print(f"Values below {Ceft_lower_bound} could be outliers.")
print(f"Values above {Ceft_upper_bound} could be outliers.")

# Calculate to see if there are any outliers in the Capomulin Final Tumour Volume data
CeftOutliers = FinalCeftamin.loc[(FinalCeftamin["Tumor Volume (mm3)"] < Ceft_lower_bound) | (FinalCeftamin["Tumor Volume (mm3)"] > Ceft_upper_bound)]
if len(CeftOutliers) ==0:
    print("No outliers were observed")
else:
    print(CeftOutliers["Tumor Volume (mm3)"])
# CeftOutliers

************************************************
Quartile calculations for Ceftamin drug treatment
The lower quartile of final tumour volume  is: 61.43
The upper quartile of final tumour volume  is: 67.53
The interquartile range of final tumour volume  is: 6.1
The median of final tumour volume is: 64.30 
Values below 52.28 could be outliers.
Values above 76.68 could be outliers.
No outliers were observed


In [23]:
# For Infubinol 
#If the data is in a dataframe, we use pandas to give quartile calculations
Infu_quartiles = InfubinolList.quantile([.25,.5,.75])
Infu_lowerq = round(Infu_quartiles[0.25],2)
Infu_upperq = round(Infu_quartiles[0.75],2)
Infu_iqr = round(Infu_upperq-Infu_lowerq,2)

print("************************************************")
print("Quartile calculations for Infubinol drug treatment")
print(f"The lower quartile of final tumour volume  is: {Infu_lowerq}")
print(f"The upper quartile of final tumour volume  is: {Infu_upperq}")
print(f"The interquartile range of final tumour volume  is: {Infu_iqr}")
print(f"The median of final tumour volume is: {Infu_quartiles[0.5]:5.2f} ")

Infu_lower_bound = Infu_lowerq - (1.5*Infu_iqr)
Infu_upper_bound = Infu_upperq + (1.5*Infu_iqr)
print(f"Values below {Infu_lower_bound} could be outliers.")
print(f"Values above {Infu_upper_bound} could be outliers.")

# Calculate to see if there are any outliers in the Capomulin Final Tumour Volume data
InfuOutliers = FinalInfubinol.loc[(FinalInfubinol["Tumor Volume (mm3)"] < Infu_lower_bound) | (FinalInfubinol["Tumor Volume (mm3)"] > Infu_upper_bound)]
if len(InfuOutliers) ==0:
    print("No outliers were observed")
else:
    print(InfuOutliers["Tumor Volume (mm3)"])

************************************************
Quartile calculations for Infubinol drug treatment
The lower quartile of final tumour volume  is: 62.75
The upper quartile of final tumour volume  is: 67.69
The interquartile range of final tumour volume  is: 4.94
The median of final tumour volume is: 66.08 
Values below 55.34 could be outliers.
Values above 75.1 could be outliers.
No outliers were observed


In [24]:
# For Ramicane 
#If the data is in a dataframe, we use pandas to give quartile calculations
Ram_quartiles = RamicaneList.quantile([.25,.5,.75])
Ram_lowerq = round(Ram_quartiles[0.25],2)
Ram_upperq = round(Ram_quartiles[0.75],2)
Ram_iqr = round(Ram_upperq-Ram_lowerq,2)

print("************************************************")
print("Quartile calculations for Ramicane drug treatment")
print(f"The lower quartile of final tumour volume  is: {Ram_lowerq}")
print(f"The upper quartile of final tumour volume  is: {Ram_upperq}")
print(f"The interquartile range of final tumour volume  is: {Ram_iqr}")
print(f"The median of final tumour volume is: {Ram_quartiles[0.5]:5.2f} ")

Ram_lower_bound = Ram_lowerq - (1.5*Ram_iqr)
Ram_upper_bound = Ram_upperq + (1.5*Ram_iqr)
print(f"Values below {Ram_lower_bound} could be outliers.")
print(f"Values above {Ram_upper_bound} could be outliers.")

# Calculate to see if there are any outliers in the Capomulin Final Tumour Volume data
RamiOutliers = FinalRamicane.loc[(FinalRamicane["Tumor Volume (mm3)"] < Ram_lower_bound) | (FinalRamicane["Tumor Volume (mm3)"] > Ram_upper_bound)]
if len(RamiOutliers) ==0:
    print("No outliers were observed")
else:
    print(RamiOutliers["Tumor Volume (mm3)"])



************************************************
Quartile calculations for Ramicane drug treatment
The lower quartile of final tumour volume  is: 30.98
The upper quartile of final tumour volume  is: 38.51
The interquartile range of final tumour volume  is: 7.53
The median of final tumour volume is: 34.85 
Values below 19.685000000000002 could be outliers.
Values above 49.805 could be outliers.
No outliers were observed


In [28]:
# This is normally at the beginning but I am having the issue of the boxplots plotting on other plots
# so this is a workaround
%matplotlib widget
# Combining the 4 final volume druglists into a single list to be able to plot them on a single chart 
DrugList = ['Capomulin', 'Ceftamin', 'Infubinol', 'Ramicane']
data_to_plot = [CapomulinList, CeftaminList, InfubinolList, RamicaneList]

#plot them on a single chart 
outliers = dict(markerfacecolor = "yellow", markersize = 6)
plt.boxplot(data_to_plot, flierprops = outliers, labels = DrugList)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

{'whiskers': [<matplotlib.lines.Line2D at 0x1c11366ab88>,
  <matplotlib.lines.Line2D at 0x1c11366ac88>,
  <matplotlib.lines.Line2D at 0x1c113678ec8>,
  <matplotlib.lines.Line2D at 0x1c113673ac8>,
  <matplotlib.lines.Line2D at 0x1c11368da08>,
  <matplotlib.lines.Line2D at 0x1c113691a48>,
  <matplotlib.lines.Line2D at 0x1c1136960c8>,
  <matplotlib.lines.Line2D at 0x1c1136a0d08>],
 'caps': [<matplotlib.lines.Line2D at 0x1c113670c08>,
  <matplotlib.lines.Line2D at 0x1c113670d08>,
  <matplotlib.lines.Line2D at 0x1c11367ae88>,
  <matplotlib.lines.Line2D at 0x1c113682fc8>,
  <matplotlib.lines.Line2D at 0x1c113691bc8>,
  <matplotlib.lines.Line2D at 0x1c113696b88>,
  <matplotlib.lines.Line2D at 0x1c11369bf88>,
  <matplotlib.lines.Line2D at 0x1c1136a4408>],
 'boxes': [<matplotlib.lines.Line2D at 0x1c11366a3c8>,
  <matplotlib.lines.Line2D at 0x1c113678d88>,
  <matplotlib.lines.Line2D at 0x1c113678648>,
  <matplotlib.lines.Line2D at 0x1c11368d1c8>],
 'medians': [<matplotlib.lines.Line2D at 0x1c113

In [None]:
# The plots confirm that there are no outliers for the 4 final volume druglists 

## Line and Scatter Plots

In [None]:
# Generate a line plot of time point versus tumor volume for a mouse treated with Capomulin
# Mouse chosen is m957
SingleMouseCap = CleanMerged.loc[CleanMerged["Drug Regimen"] == 'Capomulin', :]
SingleMouseCap = SingleMouseCap.loc[SingleMouseCap['Mouse ID'] == "m957"]
SingleMouseCap

# timepoints = np.arange(0, 50, 5)
SingleMouseCap = SingleMouseCap.loc[:, ["Timepoint", "Tumor Volume (mm3)"]]

SingleMouseCap.set_index('Timepoint').plot(figsize=(10, 8), linewidth=2.5, color='green')


# plt.show()
SingleMouseCap

In [None]:
## Mouse m957's tumour has reduced from 45 mm3 to 33.3 mm3 over the study period, whcih would seem to be a good results.
#Further comparison (in another assignement) should be done to compare to the other drugs performances 


In [None]:
timepoints

In [None]:
FinalCapomulin

In [29]:
%matplotlib widget
import matplotlib.pyplot as plt

AvgCapVol = CleanMerged.loc[CleanMerged["Drug Regimen"] == 'Capomulin', :]
# AvgCapVol = AvgCapVol.groupby([AvgCapVol['Mouse ID']).mean()
# SingleMouseCap

AvgCapVol = pd.DataFrame(AvgCapVol.groupby(["Mouse ID", 'Weight (g)'])["Tumor Volume (mm3)"].mean()).reset_index()

AvgCapVol
# Generate a scatter plot of mouse weight versus average tumor volume for the Capomulin regimen


Vol = AvgCapVol['Tumor Volume (mm3)']#.tolist()
Weights = AvgCapVol['Weight (g)']#.tolist()

# Vol
# # WeightVsVolume.set_index('Timepoint').plot(figsize=(10, 8), linewidth=2.5, color='green')
plt.scatter(Weights, Vol, marker="o")
plt.title("Mouse Weight vs Tumor Volume")


# plt.show()
# # WeightVsVolume


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 1.0, 'Mouse Weight vs Tumor Volume')

## Correlation and Regression

In [30]:
# Calculate the correlation coefficient and linear regression model 
# for mouse weight and average tumor volume for the Capomulin regimen
x_values = Weights
y_values = Vol
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(20,36),fontsize=15,color="red")

plt.xlabel('Weight (g)')
plt.ylabel('Tumor Volume (mm3)')


Text(70.45833333333334, 0.5, 'Tumor Volume (mm3)')

## There is a strong regression between mouse weight and tumour volume suggesting that there is a link between heavier mice and larger tumours.