In [52]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [53]:
df = pd.read_csv("animal_outcomes.csv")
#Over 10000 rows, max 2000. Randomly sample
df = df.sample(n=2000, random_state=10)

In [54]:
df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
5480,A892213,*Anatasia,12/28/2023 01:00:00 PM,Dec 2023,09/14/2023,Adoption,Foster,Cat,Spayed Female,3 months,Domestic Shorthair,White/Black
3766,A888197,,09/05/2023 06:30:00 PM,Sep 2023,07/25/2023,Transfer,Partner,Cat,Intact Male,1 month,Domestic Shorthair,Orange Tabby
2179,A880975,,06/16/2023 05:12:00 PM,Jun 2023,04/09/2023,Adoption,,Cat,Neutered Male,2 months,Domestic Shorthair,Blue Tabby
1861,A881776,,06/02/2023 04:26:00 PM,Jun 2023,03/26/2023,Adoption,,Cat,Spayed Female,2 months,Domestic Shorthair,Calico
4895,A889273,*Ruby,11/17/2023 12:31:00 PM,Nov 2023,08/06/2023,Adoption,Foster,Cat,Spayed Female,3 months,Domestic Shorthair,Tortie


In [55]:
#Drop Animal ID, DateTime, AnimalType
df.drop(labels=["Animal ID", "DateTime", "Animal Type", "Date of Birth"], axis=1, inplace=True)
df.head()

Unnamed: 0,Name,MonthYear,Outcome Type,Outcome Subtype,Sex upon Outcome,Age upon Outcome,Breed,Color
5480,*Anatasia,Dec 2023,Adoption,Foster,Spayed Female,3 months,Domestic Shorthair,White/Black
3766,,Sep 2023,Transfer,Partner,Intact Male,1 month,Domestic Shorthair,Orange Tabby
2179,,Jun 2023,Adoption,,Neutered Male,2 months,Domestic Shorthair,Blue Tabby
1861,,Jun 2023,Adoption,,Spayed Female,2 months,Domestic Shorthair,Calico
4895,*Ruby,Nov 2023,Adoption,Foster,Spayed Female,3 months,Domestic Shorthair,Tortie


In [56]:
df["Outcome Type"].value_counts()

Outcome Type
Adoption           1265
Transfer            613
Return to Owner      52
Euthanasia           33
Died                 18
Rto-Adopt             8
Disposal              8
Missing               1
Name: count, dtype: int64

In [57]:
df["Outcome Subtype"].value_counts()

Outcome Subtype
Partner       473
Foster        392
Snr           140
Suffering      33
In Kennel      11
In Foster       5
Emergency       1
At Vet          1
Offsite         1
In Surgery      1
Name: count, dtype: int64

In [58]:
df["Sex upon Outcome"].value_counts()

Sex upon Outcome
Neutered Male    678
Spayed Female    660
Intact Male      292
Intact Female    261
Unknown          109
Name: count, dtype: int64

In [59]:
print(df["Color"].value_counts())
df['Color'] = np.where(df['Color'].str.contains('/'), 'Mixed', df['Color'])

Color
Brown Tabby                  313
Black                        258
Black/White                  171
Orange Tabby                 150
Brown Tabby/White            145
                            ... 
Torbie/Blue Tabby              1
Gray/Gray                      1
Orange Tabby/Orange Tabby      1
Black Smoke/White              1
Calico/Calico                  1
Name: count, Length: 89, dtype: int64


In [60]:
def toWeeks(age):
    value, unit = age.split()
    value = int(value)
    if unit == 'year' or unit == 'years':
        return value * 52
    elif unit == 'month' or unit == 'months':
        return value * 4
    elif unit == 'week' or unit == 'weeks':
        return value
    else:
        return None

In [61]:
#encode string as numeric values for analysis
outcomes = {category: i for i, category in enumerate(df["Outcome Type"].unique(), 1)}
sex = {category: i for i, category in enumerate(df["Sex upon Outcome"].unique(), 1)}
df["Age upon Outcome"] = df["Age upon Outcome"].apply(toWeeks)
breeds = {category: i for i, category in enumerate(df["Breed"].unique(), 1)}
colors={category: i for i, category in enumerate(df["Color"].unique(), 1)}
sub={category: i for i, category in enumerate(df["Outcome Subtype"].unique(), 1)}
df["Name"] = np.where(df["Name"].notna(), 1, 0)

df["Outcome Type"].replace(outcomes, inplace=True)
df["Sex upon Outcome"].replace(sex, inplace=True)
df["Breed"].replace(breeds, inplace=True)
df["Color"].replace(colors, inplace=True)
df["Outcome Subtype"].replace(sub, inplace=True)

In [62]:
df.head()

Unnamed: 0,Name,MonthYear,Outcome Type,Outcome Subtype,Sex upon Outcome,Age upon Outcome,Breed,Color
5480,1,Dec 2023,1.0,1,1,12.0,1,1
3766,0,Sep 2023,2.0,2,2,4.0,1,2
2179,0,Jun 2023,1.0,3,3,8.0,1,3
1861,0,Jun 2023,1.0,3,1,8.0,1,4
4895,1,Nov 2023,1.0,1,1,12.0,1,5
