In [1]:
import pandas as pd
import numpy as np

In [2]:
# Importing the Austin Animal Center Outcome data from a csv into a dataframe

file_path = "./Austin_Animal_Center_Outcomes.csv"
outcome_df = pd.read_csv(file_path,low_memory=False)
outcome_df.head(5)

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
0,A794011,Chunk,5/8/19 18:20,19-May,5/2/17,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White
1,A776359,Gizmo,7/18/18 16:02,18-Jul,7/12/17,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown
2,A821648,,8/16/20 11:38,20-Aug,8/16/19,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray
3,A720371,Moose,2/13/16 17:59,16-Feb,10/8/15,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff
4,A674754,,3/18/14 11:47,14-Mar,3/12/14,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby


In [3]:
# Copying the dataframe into a new dataframe so that it can be transformed with addtional data.

df2 = outcome_df.copy()
df2.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
0,A794011,Chunk,5/8/19 18:20,19-May,5/2/17,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White
1,A776359,Gizmo,7/18/18 16:02,18-Jul,7/12/17,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown
2,A821648,,8/16/20 11:38,20-Aug,8/16/19,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray
3,A720371,Moose,2/13/16 17:59,16-Feb,10/8/15,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff
4,A674754,,3/18/14 11:47,14-Mar,3/12/14,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby


In [4]:
# Splitting the Age upon intake column into two columns 0 & 1 containing the number and the unit.

d = {'months': 31, 'years':365, 'days':1,'weeks':7,'month': 31, 'year':365,'day':1,'week':7}
df3=df2['Age upon Outcome'].str.extract('(\d+)\s+(years|months|weeks|days|year|month|week|day)', expand=True)

In [5]:
df3.head()

Unnamed: 0,0,1
0,2,years
1,1,year
2,1,year
3,4,months
4,6,days


In [6]:
# Here mapping the above data so that we can get Age Upon Outcome in days


outcome_df['Age Upon Outcome(days)'] = df3[0].astype(float).mul(df3[1].map(d)).astype('Int64').astype(str)
#df2['age_upon_intake(days)'] = df3[0].astype(float).mul(df3[1].map(d)).astype('Int64')
df2['Unit'] = np.where(df3[1].isin(['years','months', 'days','weeks']), ' days', ' ' + df3[1])
outcome_df.head()


Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color,Age Upon Outcome(days)
0,A794011,Chunk,5/8/19 18:20,19-May,5/2/17,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White,730
1,A776359,Gizmo,7/18/18 16:02,18-Jul,7/12/17,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown,365
2,A821648,,8/16/20 11:38,20-Aug,8/16/19,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray,365
3,A720371,Moose,2/13/16 17:59,16-Feb,10/8/15,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff,124
4,A674754,,3/18/14 11:47,14-Mar,3/12/14,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby,6


In [7]:
outcome_df.dtypes

Animal ID                 object
Name                      object
DateTime                  object
MonthYear                 object
Date of Birth             object
Outcome Type              object
Outcome Subtype           object
Animal Type               object
Sex upon Outcome          object
Age upon Outcome          object
Breed                     object
Color                     object
Age Upon Outcome(days)    object
dtype: object

In [8]:
from datetime import datetime


outcome_df['DateTime'] = pd.to_datetime(outcome_df['DateTime'] )
outcome_df['Date of Birth']=pd.to_datetime(outcome_df['Date of Birth'] )


outcome_df["Age Upon Outcome(days)"]=(outcome_df['DateTime']-outcome_df['Date of Birth']).dt.days
outcome_df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color,Age Upon Outcome(days)
0,A794011,Chunk,2019-05-08 18:20:00,19-May,2017-05-02,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White,736
1,A776359,Gizmo,2018-07-18 16:02:00,18-Jul,2017-07-12,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown,371
2,A821648,,2020-08-16 11:38:00,20-Aug,2019-08-16,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray,366
3,A720371,Moose,2016-02-13 17:59:00,16-Feb,2015-10-08,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff,128
4,A674754,,2014-03-18 11:47:00,14-Mar,2014-03-12,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby,6


In [9]:
# Checking if the mapping to convert the age upon intake into days was successful and there are no NA 
#or not applicable values



# outcome_df[outcome_df['Age Upon Outcome(days)'] == '<NA>'].count()

# # Since "Age upon Outcome" is Null for those rows , age is days and years is not calculate.
# if (outcome_df[outcome_df['Age Upon Outcome(days)'] == '<NA>']):
#     outcome_df[outcome_df['Age Upon Outcome(days)']= DateTime - Date of Birth

# #Converting the Days into integer
# #intake_df['Age Upon Intake(days)']=intake_df['Age Upon Intake(days)'].astype(int)


In [11]:

# Using datetime series getting the data for Intake Month , Intake year , Intake day name , intake hour.

series= outcome_df['DateTime']

sr = pd.to_datetime(series)

outcome_df['Outcome Month'] = sr.dt.month
outcome_df['Outcome Year']=sr.dt.year
outcome_df['Outcome Weekday'] = sr.dt.day_name()
outcome_df['Outcome Hour'] = sr.dt.hour

In [12]:
# Calculating the age upon intake in Years

outcome_df['Age Upon Outcome(years)'] = outcome_df['Age Upon Outcome(days)']/365
outcome_df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color,Age Upon Outcome(days),Outcome Month,Outcome Year,Outcome Weekday,Outcome Hour,Age Upon Outcome(years)
0,A794011,Chunk,2019-05-08 18:20:00,19-May,2017-05-02,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White,736,5,2019,Wednesday,18,2.016438
1,A776359,Gizmo,2018-07-18 16:02:00,18-Jul,2017-07-12,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown,371,7,2018,Wednesday,16,1.016438
2,A821648,,2020-08-16 11:38:00,20-Aug,2019-08-16,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray,366,8,2020,Sunday,11,1.00274
3,A720371,Moose,2016-02-13 17:59:00,16-Feb,2015-10-08,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff,128,2,2016,Saturday,17,0.350685
4,A674754,,2014-03-18 11:47:00,14-Mar,2014-03-12,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby,6,3,2014,Tuesday,11,0.016438


In [13]:
# To get the outcome count 

outcome_df['Outcome Count'] = outcome_df.groupby('Animal ID')['Animal ID'].transform('count')


# Checking the frequency of an animal - that went out 33 times.
outcome_df[outcome_df['Animal ID']=='A721033'].count()


Animal ID                  33
Name                       33
DateTime                   33
MonthYear                  33
Date of Birth              33
Outcome Type               33
Outcome Subtype             0
Animal Type                33
Sex upon Outcome           33
Age upon Outcome           33
Breed                      33
Color                      33
Age Upon Outcome(days)     33
Outcome Month              33
Outcome Year               33
Outcome Weekday            33
Outcome Hour               33
Age Upon Outcome(years)    33
Outcome Count              33
dtype: int64

In [16]:
outcome_df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color,Age Upon Outcome(days),Outcome Month,Outcome Year,Outcome Weekday,Outcome Hour,Age Upon Outcome(years),Outcome Count
0,A794011,Chunk,2019-05-08 18:20:00,19-May,2017-05-02,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White,736,5,2019,Wednesday,18,2.016438,1
1,A776359,Gizmo,2018-07-18 16:02:00,18-Jul,2017-07-12,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown,371,7,2018,Wednesday,16,1.016438,1
2,A821648,,2020-08-16 11:38:00,20-Aug,2019-08-16,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray,366,8,2020,Sunday,11,1.00274,1
3,A720371,Moose,2016-02-13 17:59:00,16-Feb,2015-10-08,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff,128,2,2016,Saturday,17,0.350685,2
4,A674754,,2014-03-18 11:47:00,14-Mar,2014-03-12,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby,6,3,2014,Tuesday,11,0.016438,1


In [17]:
outcome_df.to_csv('AAC_Outcome_etl.csv')