In [1]:
import numpy as np
from scipy.stats import chi2_contingency, norm
from data_viz import *
import plotly.express as px

In [2]:
data = pd.read_csv("data/animal_center.csv")
data

Unnamed: 0,animal_id,name,animal_type,sex_intake,sex_outcome,breed,color,date_of_birth,found_location,intake_datetime,outcome_datetime,intake_type,intake_condition,outcome_type,outcome_subtype,age_upon_intake(years),age_upon_outcome(years),duration(days)
0,A006100,Scamp,Dog,Neutered Male,Neutered Male,Spinone Italiano Mix,Yellow/White,2007-07-09,8700 Research in Austin (TX),2014-03-07 14:26:00,2014-03-08 17:10:00,Public Assist,Normal,Return to Owner,,6.7,6.7,1.0
1,A006100,Scamp,Dog,Neutered Male,Neutered Male,Spinone Italiano Mix,Yellow/White,2007-07-09,8700 Research Blvd in Austin (TX),2014-12-19 10:21:00,2014-12-20 16:35:00,Public Assist,Normal,Return to Owner,,7.4,7.4,1.0
2,A006100,Scamp,Dog,Neutered Male,Neutered Male,Spinone Italiano Mix,Yellow/White,2007-07-09,Colony Creek And Hunters Trace in Austin (TX),2017-12-07 14:07:00,2017-12-07 00:00:00,Stray,Normal,Return to Owner,,10.4,10.4,-1.0
3,A047759,Oreo,Dog,Neutered Male,Neutered Male,Dachshund,Tricolor,2004-04-02,Austin (TX),2014-04-02 15:55:00,2014-04-07 15:12:00,Owner Surrender,Normal,Transfer,Partner,10.0,10.0,4.0
4,A134067,Bandit,Dog,Neutered Male,Neutered Male,Shetland Sheepdog,Brown/White,1997-10-16,12034 Research Blvd in Austin (TX),2013-11-16 09:02:00,2013-11-16 11:54:00,Public Assist,Injured,Return to Owner,,16.1,16.1,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157648,A894260,Sassy,Cat,Intact Female,,Domestic Shorthair,Torbie,,Austin (TX),2023-12-05 16:56:00,,Owner Surrender,Normal,,,,,
157649,A894262,Diamond,Cat,Intact Female,,Domestic Shorthair,Torbie,,Austin (TX),2023-12-05 16:56:00,,Owner Surrender,Normal,,,,,
157650,A894263,Ruby,Cat,Intact Female,,Domestic Shorthair,Tortie,,Austin (TX),2023-12-05 16:56:00,,Owner Surrender,Normal,,,,,
157651,A894265,Gizmo,Cat,Intact Male,,Domestic Shorthair,Orange Tabby,,Austin (TX),2023-12-05 16:56:00,,Owner Surrender,Normal,,,,,


# **Hypothesis1:**
Is there a correlation between the neuter/spay rate of animals and their age?

$H0$: There is no significant relationship between neuter/spay rate and age in animals.\
$H1$: There is a significant relationship between neuter/spay rate and age in animals.

In [3]:
data.columns

Index(['animal_id', 'name', 'animal_type', 'sex_intake', 'sex_outcome',
       'breed', 'color', 'date_of_birth', 'found_location', 'intake_datetime',
       'outcome_datetime', 'intake_type', 'intake_condition', 'outcome_type',
       'outcome_subtype', 'age_upon_intake(years)', 'age_upon_outcome(years)',
       'duration(days)'],
      dtype='object')

In [4]:
data.outcome_type.value_counts()

Adoption           73893
Transfer           44908
Return to Owner    24431
Euthanasia          9965
Died                1470
Rto-Adopt           1079
Disposal             767
Missing               84
Relocate              26
Stolen                 5
Name: outcome_type, dtype: int64

In [5]:
alive_outcome_type = ["Euthanasia", "Died", "Disposal", "Missing", "Stolen"]
sex_change = data[(data["animal_type"].isin(["Dog", "Cat"])) & 
                  (data["sex_intake"].isin(["Intact Female", "Intact Male"])) &
                  (~data["outcome_type"].isin(alive_outcome_type))]\
    [["animal_id", "animal_type", "sex_intake", "sex_outcome", "age_upon_outcome(years)", "outcome_datetime", "duration(days)", "outcome_type"]].reset_index(drop=True)
sex_change

Unnamed: 0,animal_id,animal_type,sex_intake,sex_outcome,age_upon_outcome(years),outcome_datetime,duration(days),outcome_type
0,A163459,Dog,Intact Female,Intact Female,15.1,2014-11-14 19:28:00,0.0,Return to Owner
1,A191351,Cat,Intact Female,Intact Female,16.2,2015-11-17 13:29:00,3.0,Return to Owner
2,A212672,Dog,Intact Female,Intact Female,13.8,2013-12-06 14:34:00,10.0,Return to Owner
3,A256412,Dog,Intact Male,Intact Male,16.6,2013-10-06 14:26:00,0.0,Return to Owner
4,A309829,Dog,Intact Male,Intact Male,13.0,2014-11-12 15:34:00,2.0,Return to Owner
...,...,...,...,...,...,...,...,...
98300,A894260,Cat,Intact Female,,,,,
98301,A894262,Cat,Intact Female,,,,,
98302,A894263,Cat,Intact Female,,,,,
98303,A894265,Cat,Intact Male,,,,,


In [6]:
sex_change["neutered_spayed_or_not"] = np.where(sex_change["sex_outcome"].isin(["Neutered Male", "Spayed Female"]), 1, 0)
sex_change

Unnamed: 0,animal_id,animal_type,sex_intake,sex_outcome,age_upon_outcome(years),outcome_datetime,duration(days),outcome_type,neutered_spayed_or_not
0,A163459,Dog,Intact Female,Intact Female,15.1,2014-11-14 19:28:00,0.0,Return to Owner,0
1,A191351,Cat,Intact Female,Intact Female,16.2,2015-11-17 13:29:00,3.0,Return to Owner,0
2,A212672,Dog,Intact Female,Intact Female,13.8,2013-12-06 14:34:00,10.0,Return to Owner,0
3,A256412,Dog,Intact Male,Intact Male,16.6,2013-10-06 14:26:00,0.0,Return to Owner,0
4,A309829,Dog,Intact Male,Intact Male,13.0,2014-11-12 15:34:00,2.0,Return to Owner,0
...,...,...,...,...,...,...,...,...,...
98300,A894260,Cat,Intact Female,,,,,,0
98301,A894262,Cat,Intact Female,,,,,,0
98302,A894263,Cat,Intact Female,,,,,,0
98303,A894265,Cat,Intact Male,,,,,,0


In [7]:
neutered_n_spayed_count = sex_change.neutered_spayed_or_not.sum()
animal_count = sex_change.animal_id.count()
neutered_n_spayed_pct = neutered_n_spayed_count/animal_count

print(f"{round(neutered_n_spayed_pct, 3)*100.0}% of the animals that were intact before have been neutered/spayed upon outcome.")

63.5% of the animals that were intact before have been neutered/spayed upon outcome.


In [8]:
sex_change["age_group"] = pd.cut(sex_change["age_upon_outcome(years)"],
                                 bins=[-0.01, 2, 7, float('inf')],
                                 labels=["Young", "Adult", "Senior"])

sex_change

Unnamed: 0,animal_id,animal_type,sex_intake,sex_outcome,age_upon_outcome(years),outcome_datetime,duration(days),outcome_type,neutered_spayed_or_not,age_group
0,A163459,Dog,Intact Female,Intact Female,15.1,2014-11-14 19:28:00,0.0,Return to Owner,0,Senior
1,A191351,Cat,Intact Female,Intact Female,16.2,2015-11-17 13:29:00,3.0,Return to Owner,0,Senior
2,A212672,Dog,Intact Female,Intact Female,13.8,2013-12-06 14:34:00,10.0,Return to Owner,0,Senior
3,A256412,Dog,Intact Male,Intact Male,16.6,2013-10-06 14:26:00,0.0,Return to Owner,0,Senior
4,A309829,Dog,Intact Male,Intact Male,13.0,2014-11-12 15:34:00,2.0,Return to Owner,0,Senior
...,...,...,...,...,...,...,...,...,...,...
98300,A894260,Cat,Intact Female,,,,,,0,
98301,A894262,Cat,Intact Female,,,,,,0,
98302,A894263,Cat,Intact Female,,,,,,0,
98303,A894265,Cat,Intact Male,,,,,,0,


In [9]:
neuter_spay_age_contingency_table = pd.crosstab(sex_change["age_group"], sex_change["neutered_spayed_or_not"])
neuter_spay_age_contingency_table

neutered_spayed_or_not,0,1
age_group,Unnamed: 1_level_1,Unnamed: 2_level_1
Young,29146,52577
Adult,4359,9120
Senior,1572,727


In [10]:
age_groups = ["Young", "Adult", "Senior"]

for age_group in age_groups:
    total = neuter_spay_age_contingency_table.loc[age_group].sum()
    neuter_spay_count = neuter_spay_age_contingency_table.loc[age_group, 1]
    neuter_spay_rate = round(neuter_spay_count/total, 4)*100.0
    print(f"The neuter/spay rate for {age_group.lower()} dogs/cats is {neuter_spay_rate}%")

The neuter/spay rate for young dogs/cats is 64.34%
The neuter/spay rate for adult dogs/cats is 67.66%
The neuter/spay rate for senior dogs/cats is 31.619999999999997%


In [11]:
chi2, p, dof, expected = chi2_contingency(neuter_spay_age_contingency_table)
print(f"chi2:    {chi2}")
print(f"p-value: {p}")
print(f"dof:     {dof}")
print(f"expected:\n{expected}")

chi2:    1128.7334319508877
p-value: 7.918619626805163e-246
dof:     2
expected:
[[29400.7002082  52322.2997918 ]
 [ 4849.21060297  8629.78939703]
 [  827.08918883  1471.91081117]]


In [12]:
chi2, p, dof, expected = chi2_contingency(neuter_spay_age_contingency_table)

# set significance level = 0.05
alpha = 0.05
if p < alpha:
    print("There is a significant relationship between age and neutered/spayed status.")
else:
    print("There is no significant relationship between age and neutered/spayed status.")

There is a significant relationship between age and neutered/spayed status.


In [13]:
fig = px.histogram(sex_change,
                   x="age_group",
                   color="neutered_spayed_or_not",
                   barmode="group",
                   title="Neutered/Spayed Cases by Age Group")

fig.update_layout(title_x=0.5,
                  xaxis=dict(title="Age Group"),
                  yaxis=dict(title="Counts"),
                  legend_title_text="Neutered/Spayed Status",
                  height=600,
                  width=1000)

fig.update_layout(legend=dict(orientation="v",
                              yanchor="bottom",
                              y=1.02,
                              xanchor="right",
                              x=1))

fig.show()

## Age VS. Neuter/Spay Status

In [22]:
age_neuter_spay_research = sex_change[(~sex_change["age_upon_outcome(years)"].isna()) &
                                      (sex_change["age_upon_outcome(years)"] >= 0)]\
                                        [["age_upon_outcome(years)", "neutered_spayed_or_not"]].reset_index()
age_neuter_spay_research

Unnamed: 0,index,age_upon_outcome(years),neutered_spayed_or_not
0,0,15.1,0
1,1,16.2,0
2,2,13.8,0
3,3,16.6,0
4,4,13.0,0
...,...,...,...
97496,98226,0.6,0
97497,98231,12.9,0
97498,98256,1.0,0
97499,98260,0.9,0


In [23]:
group = age_neuter_spay_research.groupby("neutered_spayed_or_not")
group["age_upon_outcome(years)"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
neutered_spayed_or_not,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,35077.0,1.475967,2.44424,0.0,0.1,0.5,2.0,20.0
1,62424.0,1.128138,1.464125,-0.0,0.2,0.5,1.5,17.1


In [25]:
# box plot
fig = px.box(age_neuter_spay_research,
             x="neutered_spayed_or_not",
             y="age_upon_outcome(years)",
             points="all")

fig.update_layout(
    xaxis=dict(title="Neutered/Spayed"),
    yaxis=dict(title="Age"),
    title="Distribution of Age by Neutered/Spayed Status",
    title_x=0.5)

fig.show()