# Pymaceuticals Inc.
___

**Analysis Summary - Key Statistics**

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st

In [2]:
# Study data files
mouse_metadata_path = "../data/Mouse_metadata.csv"
study_results_path = "../data/Study_results.csv"

In [3]:
# Read the mouse data and the study results
mouse_metadata = pd.read_csv(mouse_metadata_path)
study_results = pd.read_csv(study_results_path)

In [4]:
# Combine the data into a single DataFrame
combined_mouse_data = pd.merge(study_results, mouse_metadata, how='left', left_on=["Mouse ID"], right_on = ["Mouse ID"])
# Display the data table for preview
combined_mouse_data.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug Regimen,Sex,Age_months,Weight (g)
0,b128,0,45.0,0,Capomulin,Female,9,22
1,f932,0,45.0,0,Ketapril,Male,15,29
2,g107,0,45.0,0,Ketapril,Female,2,29
3,a457,0,45.0,0,Ketapril,Female,11,30
4,c819,0,45.0,0,Ketapril,Male,21,25


In [5]:
Num_Mice = combined_mouse_data["Mouse ID"].nunique()
Num_Mice

249

In [6]:
# Identify duplicate rows based on Mouse ID and Timepoint
duplicate_mice = combined_mouse_data[combined_mouse_data.duplicated(subset=['Mouse ID', 'Timepoint'], keep=False)]

# Extract unique Mouse IDs from duplicate entries
duplicate_mice_ids = duplicate_mice['Mouse ID'].unique()

# Display the duplicate Mouse IDs
duplicate_mice_ids

array(['g989'], dtype=object)

In [7]:
# Optional: Get all the data for the duplicate mouse ID. 
duplicate_mice

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug Regimen,Sex,Age_months,Weight (g)
107,g989,0,45.0,0,Propriva,Female,21,26
137,g989,0,45.0,0,Propriva,Female,21,26
329,g989,5,48.786801,0,Propriva,Female,21,26
360,g989,5,47.570392,0,Propriva,Female,21,26
620,g989,10,51.745156,0,Propriva,Female,21,26
681,g989,10,49.880528,0,Propriva,Female,21,26
815,g989,15,51.325852,1,Propriva,Female,21,26
869,g989,15,53.44202,0,Propriva,Female,21,26
950,g989,20,55.326122,1,Propriva,Female,21,26
1111,g989,20,54.65765,1,Propriva,Female,21,26


In [20]:
# Create a clean DataFrame by dropping the duplicate mouse by its ID.
# combined_mouse_data_cleaned = combined_mouse_data.drop_duplicates(subset=['Mouse ID', 'Timepoint'], keep=False)
# combined_mouse_data_cleaned

combined_mouse_data_cleaned = combined_mouse_data[combined_mouse_data['Mouse ID'].isin(duplicate_mice_ids) == False]
combined_mouse_data_cleaned.head()

Unnamed: 0,Mouse ID,Timepoint,Tumor Volume (mm3),Metastatic Sites,Drug Regimen,Sex,Age_months,Weight (g)
0,b128,0,45.0,0,Capomulin,Female,9,22
1,f932,0,45.0,0,Ketapril,Male,15,29
2,g107,0,45.0,0,Ketapril,Female,2,29
3,a457,0,45.0,0,Ketapril,Female,11,30
4,c819,0,45.0,0,Ketapril,Male,21,25


In [24]:
# Checking the number of mice in the clean DataFrame.
Num_Mice = combined_mouse_data_cleaned["Mouse ID"].nunique()
Num_Mice

248