In [1]:
%matplotlib notebook

In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np

# Study data files
mouse_metadata_path = "../resources/Mouse_metadata.csv"
study_results_path = "../resources/Study_results.csv"

# Read the mouse data and the study results
mouse_metadata = pd.read_csv(mouse_metadata_path)
study_results = pd.read_csv(study_results_path)

# Combine the data into a single dataset

merged_df = pd.merge(mouse_metadata, study_results, on="Mouse ID", how="outer")

# Display the data table for preview
merged_df

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g),Timepoint,Tumor Volume (mm3),Metastatic Sites
0,k403,Ramicane,Male,21,16,0,45.000000,0
1,k403,Ramicane,Male,21,16,5,38.825898,0
2,k403,Ramicane,Male,21,16,10,35.014271,1
3,k403,Ramicane,Male,21,16,15,34.223992,1
4,k403,Ramicane,Male,21,16,20,32.997729,1
...,...,...,...,...,...,...,...,...
1888,z969,Naftisol,Male,9,30,25,63.145652,2
1889,z969,Naftisol,Male,9,30,30,65.841013,3
1890,z969,Naftisol,Male,9,30,35,69.176246,4
1891,z969,Naftisol,Male,9,30,40,70.314904,4


In [3]:
# Checking the number of mice.
unique_mice = merged_df["Mouse ID"].unique()
unique_mice
number_unique_mice = len(unique_mice)
number_unique_mice

249

In [4]:
# Getting the duplicate mice by ID number that shows up for Mouse ID and Timepoint. 
merged_df.drop_duplicates()

Unnamed: 0,Mouse ID,Drug Regimen,Sex,Age_months,Weight (g),Timepoint,Tumor Volume (mm3),Metastatic Sites
0,k403,Ramicane,Male,21,16,0,45.000000,0
1,k403,Ramicane,Male,21,16,5,38.825898,0
2,k403,Ramicane,Male,21,16,10,35.014271,1
3,k403,Ramicane,Male,21,16,15,34.223992,1
4,k403,Ramicane,Male,21,16,20,32.997729,1
...,...,...,...,...,...,...,...,...
1888,z969,Naftisol,Male,9,30,25,63.145652,2
1889,z969,Naftisol,Male,9,30,30,65.841013,3
1890,z969,Naftisol,Male,9,30,35,69.176246,4
1891,z969,Naftisol,Male,9,30,40,70.314904,4


In [5]:
# Create a clean DataFrame by dropping the duplicate mouse by its ID.
clean_merged_df = merged_df.drop_duplicates()

In [6]:
# Checking the number of mice in the clean DataFrame.
unique_mice = clean_merged_df["Mouse ID"].unique()
unique_mice
number_unique_mice = len(unique_mice)
number_unique_mice

249

In [7]:
# Checking the number of mice in the clean DataFrame.
num_mice_clean = len(clean_merged_df['Mouse ID'])
num_mice_clean

1892

In [14]:
# Generate a pie plot showing the distribution of female versus male mice using pandas

mouse_drug_df = pd.DataFrame({"Drug Regimen": clean_merged_df["Drug Regimen"],
                             "Mouse ID": clean_merged_df["Mouse ID"]})
drop_mouse_df = mouse_drug_df.drop_duplicates()
group_drop_mouse_drug_df = drop_mouse_df.groupby('Drug Regimen')
group_drop_mouse_drug = group_drop_mouse_drug_df.count()

x_axis = np.arange(len(group_drop_mouse_drug))

plt.bar(x_axis, group_drop_mouse_drug['Mouse ID'], color='r', alpha=0.5, align="center")
plt.show()
