In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
from scipy.stats import chi2_contingency, norm
from data_viz import *

In [2]:
data = pd.read_csv("data/animal_center.csv")
data

Unnamed: 0,animal_id,name,animal_type,sex_intake,sex_outcome,breed,color,date_of_birth,found_location,intake_datetime,outcome_datetime,intake_type,intake_condition,outcome_type,outcome_subtype,age_upon_intake(years),age_upon_outcome(years),duration(days)
0,A006100,Scamp,Dog,Neutered Male,Neutered Male,Spinone Italiano Mix,Yellow/White,2007-07-09,8700 Research in Austin (TX),2014-03-07 14:26:00,2014-03-08 17:10:00,Public Assist,Normal,Return to Owner,,6.7,6.7,1.0
1,A006100,Scamp,Dog,Neutered Male,Neutered Male,Spinone Italiano Mix,Yellow/White,2007-07-09,8700 Research Blvd in Austin (TX),2014-12-19 10:21:00,2014-12-20 16:35:00,Public Assist,Normal,Return to Owner,,7.4,7.4,1.0
2,A006100,Scamp,Dog,Neutered Male,Neutered Male,Spinone Italiano Mix,Yellow/White,2007-07-09,Colony Creek And Hunters Trace in Austin (TX),2017-12-07 14:07:00,2017-12-07 00:00:00,Stray,Normal,Return to Owner,,10.4,10.4,-1.0
3,A047759,Oreo,Dog,Neutered Male,Neutered Male,Dachshund,Tricolor,2004-04-02,Austin (TX),2014-04-02 15:55:00,2014-04-07 15:12:00,Owner Surrender,Normal,Transfer,Partner,10.0,10.0,4.0
4,A134067,Bandit,Dog,Neutered Male,Neutered Male,Shetland Sheepdog,Brown/White,1997-10-16,12034 Research Blvd in Austin (TX),2013-11-16 09:02:00,2013-11-16 11:54:00,Public Assist,Injured,Return to Owner,,16.1,16.1,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157648,A894260,Sassy,Cat,Intact Female,,Domestic Shorthair,Torbie,,Austin (TX),2023-12-05 16:56:00,,Owner Surrender,Normal,,,,,
157649,A894262,Diamond,Cat,Intact Female,,Domestic Shorthair,Torbie,,Austin (TX),2023-12-05 16:56:00,,Owner Surrender,Normal,,,,,
157650,A894263,Ruby,Cat,Intact Female,,Domestic Shorthair,Tortie,,Austin (TX),2023-12-05 16:56:00,,Owner Surrender,Normal,,,,,
157651,A894265,Gizmo,Cat,Intact Male,,Domestic Shorthair,Orange Tabby,,Austin (TX),2023-12-05 16:56:00,,Owner Surrender,Normal,,,,,


# **Hypothesis2:**
Has there been a significant change in the adoption rate of animals before and after the onset of COVID-19?

$H0$: There is no significant difference in the adoption rate of animals before and after the onset of COVID.\
$H1$: There is a significant increase in the adoption rate of animals after the onset of COVID compared to before.

In [3]:
adoption_research = data[["animal_id", "outcome_datetime", "outcome_type"]].reset_index(drop=True)
adoption_research = adoption_research.dropna(subset=["outcome_datetime"])
adoption_research

Unnamed: 0,animal_id,outcome_datetime,outcome_type
0,A006100,2014-03-08 17:10:00,Return to Owner
1,A006100,2014-12-20 16:35:00,Return to Owner
2,A006100,2017-12-07 00:00:00,Return to Owner
3,A047759,2014-04-07 15:12:00,Transfer
4,A134067,2013-11-16 11:54:00,Return to Owner
...,...,...,...
157571,A894012,2023-12-01 16:00:00,Transfer
157591,A894054,2023-12-02 13:03:00,Rto-Adopt
157598,A894102,2023-12-05 15:28:00,Return to Owner
157619,A894181,2023-12-04 18:13:00,Return to Owner


In [4]:
adoption_research["adoption_status"] = np.where(adoption_research["outcome_type"] == "Adoption", "Adopted", "Not Adopted")
adoption_research

Unnamed: 0,animal_id,outcome_datetime,outcome_type,adoption_status
0,A006100,2014-03-08 17:10:00,Return to Owner,Not Adopted
1,A006100,2014-12-20 16:35:00,Return to Owner,Not Adopted
2,A006100,2017-12-07 00:00:00,Return to Owner,Not Adopted
3,A047759,2014-04-07 15:12:00,Transfer,Not Adopted
4,A134067,2013-11-16 11:54:00,Return to Owner,Not Adopted
...,...,...,...,...
157571,A894012,2023-12-01 16:00:00,Transfer,Not Adopted
157591,A894054,2023-12-02 13:03:00,Rto-Adopt,Not Adopted
157598,A894102,2023-12-05 15:28:00,Return to Owner,Not Adopted
157619,A894181,2023-12-04 18:13:00,Return to Owner,Not Adopted


In [5]:
datetime_threshold = "2020-01-01"
adoption_research["before_or_after_covid"] = np.where(adoption_research["outcome_datetime"] < datetime_threshold, "pre-covid", "post-covid")
adoption_research

Unnamed: 0,animal_id,outcome_datetime,outcome_type,adoption_status,before_or_after_covid
0,A006100,2014-03-08 17:10:00,Return to Owner,Not Adopted,pre-covid
1,A006100,2014-12-20 16:35:00,Return to Owner,Not Adopted,pre-covid
2,A006100,2017-12-07 00:00:00,Return to Owner,Not Adopted,pre-covid
3,A047759,2014-04-07 15:12:00,Transfer,Not Adopted,pre-covid
4,A134067,2013-11-16 11:54:00,Return to Owner,Not Adopted,pre-covid
...,...,...,...,...,...
157571,A894012,2023-12-01 16:00:00,Transfer,Not Adopted,post-covid
157591,A894054,2023-12-02 13:03:00,Rto-Adopt,Not Adopted,post-covid
157598,A894102,2023-12-05 15:28:00,Return to Owner,Not Adopted,post-covid
157619,A894181,2023-12-04 18:13:00,Return to Owner,Not Adopted,post-covid


In [6]:
adoption_contingency_table = pd.crosstab(adoption_research["before_or_after_covid"], adoption_research["adoption_status"])
adoption_contingency_table

adoption_status,Adopted,Not Adopted
before_or_after_covid,Unnamed: 1_level_1,Unnamed: 2_level_1
post-covid,24612,19424
pre-covid,49281,63341


In [7]:
covid_list = ["post-covid", "pre-covid"]

for covid in covid_list:
    total = adoption_contingency_table.loc[covid].sum()
    adoption_count = adoption_contingency_table.loc[covid, "Adopted"]
    adoption_rate = round(adoption_count/total, 4)*100.0
    print(f"The adoption rate {covid} is {adoption_rate}%")

The adoption rate post-covid is 55.88999999999999%
The adoption rate pre-covid is 43.76%


In [8]:
chi2, p, dof, expected = chi2_contingency(adoption_contingency_table)
print(f"chi2:    {chi2}")
print(f"p-value: {p}")
print(f"dof:     {dof}")
print(f"expected:\n{expected}")

chi2:    1869.5599991713798
p-value: 0.0
dof:     1
expected:
[[20771.05636482 23264.94363518]
 [53121.94363518 59500.05636482]]


In [9]:
# set significance level = 0.05
alpha = 0.05
if p < alpha:
    print("There is a significant increase in the adoption rate post-COVID compared to pre-COVID.")
else:
    print("There is no significant increase in the adoption rate post-COVID compared to pre-COVID.")

There is a significant increase in the adoption rate post-COVID compared to pre-COVID.


In [10]:
fig = px.histogram(adoption_research,
             x="before_or_after_covid",
             color="adoption_status",
             barmode = "group",
             title="Adoption Cases Before and After Covid")

fig.update_layout(title_x=0.5,
                  xaxis=dict(title="Adoption Status"),
                  yaxis=dict(title="Counts"),
                  legend_title_text="Adoption Status",
                  height=600,
                  width=1000)

fig.update_layout(legend=dict(
    orientation="v",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1))

fig.show()