In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
from scipy.stats import chi2_contingency, norm
from data_viz import *

In [2]:
data = pd.read_csv("data/animal_center.csv")
data

Unnamed: 0,animal_id,name,animal_type,sex_intake,sex_outcome,breed,color,date_of_birth,found_location,intake_datetime,outcome_datetime,intake_type,intake_condition,outcome_type,outcome_subtype,age_upon_intake(years),age_upon_outcome(years),duration(days)
0,A006100,Scamp,Dog,Neutered Male,Neutered Male,Spinone Italiano Mix,Yellow/White,2007-07-09,8700 Research in Austin (TX),2014-03-07 14:26:00,2014-03-08 17:10:00,Public Assist,Normal,Return to Owner,,6.7,6.7,1.0
1,A006100,Scamp,Dog,Neutered Male,Neutered Male,Spinone Italiano Mix,Yellow/White,2007-07-09,8700 Research Blvd in Austin (TX),2014-12-19 10:21:00,2014-12-20 16:35:00,Public Assist,Normal,Return to Owner,,7.4,7.4,1.0
2,A006100,Scamp,Dog,Neutered Male,Neutered Male,Spinone Italiano Mix,Yellow/White,2007-07-09,Colony Creek And Hunters Trace in Austin (TX),2017-12-07 14:07:00,2017-12-07 00:00:00,Stray,Normal,Return to Owner,,10.4,10.4,-1.0
3,A047759,Oreo,Dog,Neutered Male,Neutered Male,Dachshund,Tricolor,2004-04-02,Austin (TX),2014-04-02 15:55:00,2014-04-07 15:12:00,Owner Surrender,Normal,Transfer,Partner,10.0,10.0,4.0
4,A134067,Bandit,Dog,Neutered Male,Neutered Male,Shetland Sheepdog,Brown/White,1997-10-16,12034 Research Blvd in Austin (TX),2013-11-16 09:02:00,2013-11-16 11:54:00,Public Assist,Injured,Return to Owner,,16.1,16.1,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157585,A894036,,Dog,Intact Female,,Miniature Pinscher Mix,Black/Tan,,Martin Luther King Jr Blvd And M Sta Ln in Aus...,2023-12-01 17:33:00,,Stray,Normal,,,,,
157586,A894054,Dawn,Dog,Intact Female,Intact Female,Chihuahua Shorthair,Tan,2022-12-02,2001 Ponciana Loop in Austin (TX),2023-12-02 09:52:00,2023-12-02 13:03:00,Stray,Normal,Rto-Adopt,,1.0,1.0,0.0
157587,A894063,Logan,Cat,Neutered Male,,Domestic Shorthair,Brown Tabby/White,,8585 Scotland Well Drive in Austin (TX),2023-12-02 12:49:00,,Stray,Normal,,,,,
157588,A894067,Maverick,Dog,Intact Male,,Siberian Husky,Black/White,,124 West Anderson Lane in Austin (TX),2023-12-02 13:53:00,,Abandoned,Normal,,,,,


# **Hypothesis1:**
Is there a correlation between the neuter/spay rate of animals and their age?

$H0$: There is no significant relationship between neuter/spay rate and age in animals.\
$H1$: There is a significant relationship between neuter/spay rate and age in animals.

In [3]:
data.columns

Index(['animal_id', 'name', 'animal_type', 'sex_intake', 'sex_outcome',
       'breed', 'color', 'date_of_birth', 'found_location', 'intake_datetime',
       'outcome_datetime', 'intake_type', 'intake_condition', 'outcome_type',
       'outcome_subtype', 'age_upon_intake(years)', 'age_upon_outcome(years)',
       'duration(days)'],
      dtype='object')

In [4]:
data.outcome_type.value_counts()

Adoption           73848
Transfer           44898
Return to Owner    24425
Euthanasia          9964
Died                1468
Rto-Adopt           1079
Disposal             766
Missing               84
Relocate              26
Stolen                 5
Name: outcome_type, dtype: int64

In [5]:
alive_outcome_type = ["Euthanasia", "Died", "Disposal", "Missing", "Stolen"]
sex_change = data[(data["animal_type"].isin(["Dog", "Cat"])) & 
                  (data["sex_intake"].isin(["Intact Female", "Intact Male"])) &
                  (~data["outcome_type"].isin(alive_outcome_type))]\
    [["animal_id", "animal_type", "sex_intake", "sex_outcome", "age_upon_outcome(years)", "outcome_datetime", "duration(days)", "outcome_type"]].reset_index(drop=True)
sex_change

Unnamed: 0,animal_id,animal_type,sex_intake,sex_outcome,age_upon_outcome(years),outcome_datetime,duration(days),outcome_type
0,A163459,Dog,Intact Female,Intact Female,15.1,2014-11-14 19:28:00,0.0,Return to Owner
1,A191351,Cat,Intact Female,Intact Female,16.2,2015-11-17 13:29:00,3.0,Return to Owner
2,A212672,Dog,Intact Female,Intact Female,13.8,2013-12-06 14:34:00,10.0,Return to Owner
3,A256412,Dog,Intact Male,Intact Male,16.6,2013-10-06 14:26:00,0.0,Return to Owner
4,A309829,Dog,Intact Male,Intact Male,13.0,2014-11-12 15:34:00,2.0,Return to Owner
...,...,...,...,...,...,...,...,...
98249,A894030,Cat,Intact Male,,,,,
98250,A894031,Dog,Intact Male,,,,,
98251,A894036,Dog,Intact Female,,,,,
98252,A894054,Dog,Intact Female,Intact Female,1.0,2023-12-02 13:03:00,0.0,Rto-Adopt


In [6]:
sex_change["neutered_spayed_or_not"] = np.where(sex_change["sex_outcome"].isin(["Neutered Male", "Spayed Female"]), 1, 0)
sex_change

Unnamed: 0,animal_id,animal_type,sex_intake,sex_outcome,age_upon_outcome(years),outcome_datetime,duration(days),outcome_type,neutered_spayed_or_not
0,A163459,Dog,Intact Female,Intact Female,15.1,2014-11-14 19:28:00,0.0,Return to Owner,0
1,A191351,Cat,Intact Female,Intact Female,16.2,2015-11-17 13:29:00,3.0,Return to Owner,0
2,A212672,Dog,Intact Female,Intact Female,13.8,2013-12-06 14:34:00,10.0,Return to Owner,0
3,A256412,Dog,Intact Male,Intact Male,16.6,2013-10-06 14:26:00,0.0,Return to Owner,0
4,A309829,Dog,Intact Male,Intact Male,13.0,2014-11-12 15:34:00,2.0,Return to Owner,0
...,...,...,...,...,...,...,...,...,...
98249,A894030,Cat,Intact Male,,,,,,0
98250,A894031,Dog,Intact Male,,,,,,0
98251,A894036,Dog,Intact Female,,,,,,0
98252,A894054,Dog,Intact Female,Intact Female,1.0,2023-12-02 13:03:00,0.0,Rto-Adopt,0


In [7]:
neutered_n_spayed_count = sex_change.neutered_spayed_or_not.sum()
animal_count = sex_change.animal_id.count()
neutered_n_spayed_pct = neutered_n_spayed_count/animal_count

print(f"{round(neutered_n_spayed_pct, 3)*100.0}% of the animals that were intact before have been neutered/spayed upon outcome.")

63.5% of the animals that were intact before have been neutered/spayed upon outcome.


In [8]:
sex_change["age_group"] = pd.cut(sex_change["age_upon_outcome(years)"],
                                 bins=[0, 2, 7, float('inf')],
                                 labels=["Young", "Adult", "Senior"])

sex_change

Unnamed: 0,animal_id,animal_type,sex_intake,sex_outcome,age_upon_outcome(years),outcome_datetime,duration(days),outcome_type,neutered_spayed_or_not,age_group
0,A163459,Dog,Intact Female,Intact Female,15.1,2014-11-14 19:28:00,0.0,Return to Owner,0,Senior
1,A191351,Cat,Intact Female,Intact Female,16.2,2015-11-17 13:29:00,3.0,Return to Owner,0,Senior
2,A212672,Dog,Intact Female,Intact Female,13.8,2013-12-06 14:34:00,10.0,Return to Owner,0,Senior
3,A256412,Dog,Intact Male,Intact Male,16.6,2013-10-06 14:26:00,0.0,Return to Owner,0,Senior
4,A309829,Dog,Intact Male,Intact Male,13.0,2014-11-12 15:34:00,2.0,Return to Owner,0,Senior
...,...,...,...,...,...,...,...,...,...,...
98249,A894030,Cat,Intact Male,,,,,,0,
98250,A894031,Dog,Intact Male,,,,,,0,
98251,A894036,Dog,Intact Female,,,,,,0,
98252,A894054,Dog,Intact Female,Intact Female,1.0,2023-12-02 13:03:00,0.0,Rto-Adopt,0,Young


In [9]:
neuter_spay_age_contingency_table = pd.crosstab(sex_change["age_group"], sex_change["neutered_spayed_or_not"])
neuter_spay_age_contingency_table

neutered_spayed_or_not,0,1
age_group,Unnamed: 1_level_1,Unnamed: 2_level_1
Young,25266,52544
Adult,4358,9115
Senior,1572,726


In [10]:
age_groups = ["Young", "Adult", "Senior"]

for age_group in age_groups:
    total = neuter_spay_age_contingency_table.loc[age_group].sum()
    neuter_spay_count = neuter_spay_age_contingency_table.loc[age_group, 1]
    neuter_spay_rate = round(neuter_spay_count/total, 4)*100.0
    print(f"The neuter/spay rate for {age_group.lower()} dogs/cats is {neuter_spay_rate}%")

The neuter/spay rate for young dogs/cats is 67.53%
The neuter/spay rate for adult dogs/cats is 67.65%
The neuter/spay rate for senior dogs/cats is 31.59%


In [11]:
chi2, p, dof, expected = chi2_contingency(neuter_spay_age_contingency_table)
print(f"chi2:    {chi2}")
print(f"p-value: {p}")
print(f"dof:     {dof}")
print(f"expected:\n{expected}")

chi2:    1304.0052860032565
p-value: 6.900013779848828e-284
dof:     2
expected:
[[25938.60676847 51871.39323153]
 [ 4491.33593358  8981.66406642]
 [  766.05729796  1531.94270204]]


In [12]:
chi2, p, dof, expected = chi2_contingency(neuter_spay_age_contingency_table)

# set significance level = 0.05
alpha = 0.05
if p < alpha:
    print("There is a significant relationship between age and neutered/spayed status.")
else:
    print("There is no significant relationship between age and neutered/spayed status.")

There is a significant relationship between age and neutered/spayed status.
