In [1]:
# I am interested in figuring out what factors might impact a cats's chances of getting adopted (vs other outcomes)
# I'm working from a dataset of intakes and outcomes from a shelter in Texas (Austin Animal Center)
# I have previously cleaned this data, so I don't need to worry about duplicates, nulls, etc. 

In [23]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
#import seaborn as sns
#sns.set()
from ydata_profiling import ProfileReport

In [3]:
# Import data
data = pd.read_csv('Intake and Outcome Data - Austin Animal Center.csv')
data = data.iloc[:,1:]
data.head()

Unnamed: 0,Animal ID,Animal Type,Date of Birth,Sex,Breed,Color,Intake Type,Intake Condition,Intake Age,Intake Reproductive Status,Intake DateTime,Outcome DateTime,Outcome Type,Outcome Subtype,Outcome Age,Outcome Reproductive Status
0,A521520,Dog,09/07/2006,Female,Border Terrier/Border Collie,White/Tan,Stray,Normal,2555,Altered,2013-10-01 07:51:00,2013-10-01 15:39:00,Return to Owner,Not Specified,2555,Altered
1,A664235,Cat,09/24/2013,Unknown,Domestic Shorthair Mix,Orange/White,Stray,Normal,7,Unknown,2013-10-01 08:33:00,2013-10-01 10:39:00,Transfer,Partner,7,Unknown
2,A664237,Cat,09/24/2013,Unknown,Domestic Shorthair Mix,Orange/White,Stray,Normal,7,Unknown,2013-10-01 08:33:00,2013-10-01 10:44:00,Transfer,Partner,7,Unknown
3,A664236,Cat,09/24/2013,Unknown,Domestic Shorthair Mix,Orange/White,Stray,Normal,7,Unknown,2013-10-01 08:33:00,2013-10-01 10:44:00,Transfer,Partner,7,Unknown
4,A664233,Dog,09/30/2010,Female,Pit Bull Mix,Blue/White,Stray,Injured,1095,Intact,2013-10-01 08:53:00,2013-10-01 15:33:00,Euthanasia,Suffering,1095,Intact


In [4]:
# Converting strings to numerical values
data['Intake DateTime'] = pd.to_datetime(data['Intake DateTime'], format = '%Y-%m-%d %H:%M:%S')
data['Outcome DateTime'] = pd.to_datetime(data['Outcome DateTime'], format = '%Y-%m-%d %H:%M:%S')
data['Date of Birth'] = pd.to_datetime(data['Date of Birth'], format = '%m/%d/%Y')

In [5]:
# I expect that the factors that impact a dog's chances of adoption might differ than those of a cat, livestock, or exotic animal.
# I'm focusing on cats in this exploration, so I don't need to continue with all the other species' data. 

#dogs = data.loc[data['Animal Type'] == 'Dog']
#bird = data.loc[data['Animal Type'] == 'Bird']
#livestock = data.loc[data['Animal Type'] == 'Livestock'] #this dataset is pretty small - only about 30 animals, so barely enough data to interpret!
#exotics = data.loc[data['Animal Type'] == 'Other']

cats = data.loc[data['Animal Type'] == 'Cat']
cats['Outcome Type'].unique()

array(['Transfer', 'Euthanasia', 'Adoption', 'Died', 'Return to Owner',
       'Missing', 'Disposal', 'Rto-Adopt', 'Relocate', 'Lost'],
      dtype=object)

In [6]:
# Pet's with presenting as Euthanasia Request will never be adopted out, so these won't answer my question in a meaningful way. 
# So I will get rid of that intake type
# Same for Lost and Missing as outcome types; these aren't intentional outcomes, so they won't answer my question meaningfully (and are hopefully infrequent)

cats = cats.loc[(cats['Intake Type'] != 'Euthanasia Request') &
                ((cats['Outcome Type'] != 'Lost') &
                (cats['Outcome Type'] != 'Missing') )]


In [7]:
# Function to calculate the Duration of Stay in days
def calculate_stay(row):
    timedelta = row['Outcome DateTime'] - row['Intake DateTime']
    return timedelta.days

In [8]:
# I'd also like to track the length of stay in the shelter to see if the variables associated with adoption outcome also change how long it takes a pet to be adopted. 

cats['Duration of Stay'] = cats.apply(calculate_stay, axis = 1)
cats.head()

Unnamed: 0,Animal ID,Animal Type,Date of Birth,Sex,Breed,Color,Intake Type,Intake Condition,Intake Age,Intake Reproductive Status,Intake DateTime,Outcome DateTime,Outcome Type,Outcome Subtype,Outcome Age,Outcome Reproductive Status,Duration of Stay
1,A664235,Cat,2013-09-24,Unknown,Domestic Shorthair Mix,Orange/White,Stray,Normal,7,Unknown,2013-10-01 08:33:00,2013-10-01 10:39:00,Transfer,Partner,7,Unknown,0
2,A664237,Cat,2013-09-24,Unknown,Domestic Shorthair Mix,Orange/White,Stray,Normal,7,Unknown,2013-10-01 08:33:00,2013-10-01 10:44:00,Transfer,Partner,7,Unknown,0
3,A664236,Cat,2013-09-24,Unknown,Domestic Shorthair Mix,Orange/White,Stray,Normal,7,Unknown,2013-10-01 08:33:00,2013-10-01 10:44:00,Transfer,Partner,7,Unknown,0
5,A664238,Cat,2013-06-01,Unknown,American Shorthair Mix,Black/White,Stray,Normal,120,Unknown,2013-10-01 09:33:00,2013-10-01 16:47:00,Euthanasia,Medical,120,Unknown,0
7,A664256,Cat,1996-10-01,Male,Domestic Shorthair Mix,Brown Tabby/White,Owner Surrender,Normal,6205,Altered,2013-10-01 10:59:00,2013-10-10 13:17:00,Transfer,Partner,6205,Altered,9


In [9]:
# The Animal Type is now a constant. So I can drop it. 

cats = cats.drop(columns = ['Animal Type'])
cats.head()

Unnamed: 0,Animal ID,Date of Birth,Sex,Breed,Color,Intake Type,Intake Condition,Intake Age,Intake Reproductive Status,Intake DateTime,Outcome DateTime,Outcome Type,Outcome Subtype,Outcome Age,Outcome Reproductive Status,Duration of Stay
1,A664235,2013-09-24,Unknown,Domestic Shorthair Mix,Orange/White,Stray,Normal,7,Unknown,2013-10-01 08:33:00,2013-10-01 10:39:00,Transfer,Partner,7,Unknown,0
2,A664237,2013-09-24,Unknown,Domestic Shorthair Mix,Orange/White,Stray,Normal,7,Unknown,2013-10-01 08:33:00,2013-10-01 10:44:00,Transfer,Partner,7,Unknown,0
3,A664236,2013-09-24,Unknown,Domestic Shorthair Mix,Orange/White,Stray,Normal,7,Unknown,2013-10-01 08:33:00,2013-10-01 10:44:00,Transfer,Partner,7,Unknown,0
5,A664238,2013-06-01,Unknown,American Shorthair Mix,Black/White,Stray,Normal,120,Unknown,2013-10-01 09:33:00,2013-10-01 16:47:00,Euthanasia,Medical,120,Unknown,0
7,A664256,1996-10-01,Male,Domestic Shorthair Mix,Brown Tabby/White,Owner Surrender,Normal,6205,Altered,2013-10-01 10:59:00,2013-10-10 13:17:00,Transfer,Partner,6205,Altered,9


In [43]:
def consolidate (row, dict, column):
    key = row[column]
    new_value = dict.get(key, key)
    return new_value

In [44]:
# There are a few Outcome Types that seem like they can be bundled together: relocate and transfer both suggest going to another shelter elsewhere; died and disposal are likely similar enough, and Rto-Adopt and Return to Owner can 
# be consolidated (this is a judgement call, but given that the pet is returning to the owner in both cases, I'm choosing to prioritize the Rto part of "Rto-Adopt").

outcome_consolidation = {
        'Disposal': 'Died',
        'Rto-Adopt': 'Return to Owner',
        'Relocate': 'Transfer'}

cats['Outcome Type'] = cats.apply(consolidate, args= (outcome_consolidation, 'Outcome Type'), axis = 1)
# cats_to_change = (cats['Outcome Type'] == 'Disposal') | (cats['Outcome Type'] == 'Rto-Adopt') | (cats['Outcome Type'] == 'Relocate')
# cats.loc[cats_to_change, 'Outcome Type'] = cats.loc[cats_to_change, 'Outcome Type'].map(outcome_consolidation).fillna("unrecognized")

In [51]:
# The intake conditions can also be consolidated. 
# All neonates are nursing juveniles; these animals will need to stay in the shelter system until at least 8wks, so they may have a longer duration of stay
# Pregnant animals and nursing adult will stay until their kittens are weaned, so they may have a longer duration of stay.
# I'm consolidating Med Attn and Medical into the "sick" condition bc these are likely mild-moderate sicknesses. 
# I'm making a 'severe illness' category that will encompass panleuk, agonal, neurological, and med urgent as these codes suggest more serious levels of sickness
# Lastly, I'm consolidating "unknown", "other", and "space" into other because these are vague and infrequent.

intake_conditions = {
    'Neonatal': 'Nursing Juvenile',
    'Pregnant': 'Pregnant/Lactating',
    'Nursing Adult' : 'Pregnant/Lactating',
    'Medical' : 'Sick',
    'Med Attn' : 'Sick',
    'Unknown' : 'Other',
    'Space' : 'Other', 
    'Panleuk': 'Severe Illness', 
    'Agonal' : 'Severe Illness',
    'Neurologic' : 'Severe Illness', 
    'Med Urgent' : 'Severe Illness'
}

cats['Intake Condition'] = cats.apply(consolidate, args= (intake_conditions, 'Intake Condition'), axis = 1)

In [53]:
# Exploring data with ydata_profiling
cats_profile = ProfileReport(cats, title="Profiling Report")
cats_profile

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]



In [28]:
# examining cats that have a duration of stay in the 95th+ percentile

long_stay = cats.loc[cats['Duration of Stay'] > 113]
long_stay_profile = ProfileReport(long_stay, title="Profiling Report")
long_stay_profile

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]



In [27]:
plt.figure(figsize=(8, 8))  # Optional: Adjust the figure size as needed
plt.pie(
    long_stay['Outcome Type'].value_counts(normalize=True),
    labels=long_stay['Outcome Type'].unique(),
    startangle=90,
    colors=sns.color_palette('pastel')[0:10]  # Ensure there are enough colors for all categories
)

# Equal aspect ratio ensures that pie is drawn as a circle.
plt.axis('equal')

(-1.099999969147701, 1.099999916536695, -1.0999999119490775, 1.099999995807099)

In [15]:
fig, axs = plt.subplots(1, figsize=(30, 10))
axs[0].pie(cats['Outcome Type'].value_counts(normalize=True), colors = sns.color_palette('pastel')[0:10], labels=cats['Outcome Type'].unique(), autopct='%1.1f%%')
axs[0].set_title('Cat Outcome Types')


#sns.displot(livestock['Outcome Type'], kde=True)

 #sharey -> share 'Price' as y
#ax1.scatter(livestock['Year'],livestock['Price'])
#ax1.set_title('Price and Year')
#ax2.scatter(data_cleaned['EngineV'],data_cleaned['Price'])
#ax2.set_title('Price and EngineV')
#ax3.scatter(data_cleaned['Mileage'],data_cleaned['Price'])
#ax3.set_title('Price and Mileage')
#plt.show()


#plt.xlabel('Intake Condition',size=18)
#plt.ylabel('Outcome Type',size=18)

#axs[0].bar(names, values)
#axs[1].scatter(names, values)
#axs[2].plot(names, values)
#fig.suptitle('Categorical Plotting')
#plt.show()




TypeError: 'Axes' object is not subscriptable