In [1]:
import pandas as pd
import numpy as np

In [2]:
# Importing the Austin Animal Center Outcome data from a csv into a dataframe

file_path = "./Austin_Animal_Center_Outcomes.csv"
outcome_df = pd.read_csv(file_path,low_memory=False)
outcome_df.head(5)

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
0,A794011,Chunk,5/8/19 18:20,19-May,5/2/17,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White
1,A776359,Gizmo,7/18/18 16:02,18-Jul,7/12/17,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown
2,A821648,,8/16/20 11:38,20-Aug,8/16/19,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray
3,A720371,Moose,2/13/16 17:59,16-Feb,10/8/15,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff
4,A674754,,3/18/14 11:47,14-Mar,3/12/14,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby


In [3]:
# Copying the dataframe into a new dataframe so that it can be transformed with addtional data.

df2 = outcome_df.copy()
df2.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
0,A794011,Chunk,5/8/19 18:20,19-May,5/2/17,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White
1,A776359,Gizmo,7/18/18 16:02,18-Jul,7/12/17,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown
2,A821648,,8/16/20 11:38,20-Aug,8/16/19,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray
3,A720371,Moose,2/13/16 17:59,16-Feb,10/8/15,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff
4,A674754,,3/18/14 11:47,14-Mar,3/12/14,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby


In [4]:
# Splitting the Age upon intake column into two columns 0 & 1 containing the number and the unit.

d = {'months': 31, 'years':365, 'days':1,'weeks':7,'month': 31, 'year':365,'day':1,'week':7}
df3=df2['Age upon Outcome'].str.extract('(\d+)\s+(years|months|weeks|days|year|month|week|day)', expand=True)

In [5]:
df3.head(2)

Unnamed: 0,0,1
0,2,years
1,1,year


In [6]:
# Here mapping the above data so that we can get Age Upon Outcome in days


outcome_df['Age Upon Outcome(days)'] = df3[0].astype(float).mul(df3[1].map(d)).astype('Int64').astype(str)
#df2['age_upon_intake(days)'] = df3[0].astype(float).mul(df3[1].map(d)).astype('Int64')
df2['Unit'] = np.where(df3[1].isin(['years','months', 'days','weeks']), ' days', ' ' + df3[1])
outcome_df.head()


Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color,Age Upon Outcome(days)
0,A794011,Chunk,5/8/19 18:20,19-May,5/2/17,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White,730
1,A776359,Gizmo,7/18/18 16:02,18-Jul,7/12/17,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown,365
2,A821648,,8/16/20 11:38,20-Aug,8/16/19,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray,365
3,A720371,Moose,2/13/16 17:59,16-Feb,10/8/15,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff,124
4,A674754,,3/18/14 11:47,14-Mar,3/12/14,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby,6


In [7]:
outcome_df.dtypes

Animal ID                 object
Name                      object
DateTime                  object
MonthYear                 object
Date of Birth             object
Outcome Type              object
Outcome Subtype           object
Animal Type               object
Sex upon Outcome          object
Age upon Outcome          object
Breed                     object
Color                     object
Age Upon Outcome(days)    object
dtype: object

In [8]:
outcome_df.count()

Animal ID                 143722
Name                      101667
DateTime                  143722
MonthYear                 143722
Date of Birth             143722
Outcome Type              143699
Outcome Subtype            65906
Animal Type               143722
Sex upon Outcome          143721
Age upon Outcome          143685
Breed                     143722
Color                     143722
Age Upon Outcome(days)    143722
dtype: int64

In [9]:
from datetime import datetime


outcome_df['DateTime'] = pd.to_datetime(outcome_df['DateTime'] )
outcome_df['Date of Birth']=pd.to_datetime(outcome_df['Date of Birth'] )


outcome_df["Age Upon Outcome(days)"]=(outcome_df['DateTime']-outcome_df['Date of Birth']).dt.days
outcome_df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color,Age Upon Outcome(days)
0,A794011,Chunk,2019-05-08 18:20:00,19-May,2017-05-02,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White,736
1,A776359,Gizmo,2018-07-18 16:02:00,18-Jul,2017-07-12,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown,371
2,A821648,,2020-08-16 11:38:00,20-Aug,2019-08-16,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray,366
3,A720371,Moose,2016-02-13 17:59:00,16-Feb,2015-10-08,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff,128
4,A674754,,2014-03-18 11:47:00,14-Mar,2014-03-12,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby,6


In [10]:
# Checking if the mapping to convert the age upon intake into days was successful and there are no NA 
#or not applicable values



# outcome_df[outcome_df['Age Upon Outcome(days)'] == '<NA>'].count()

# # Since "Age upon Outcome" is Null for those rows , age is days and years is not calculate.
# if (outcome_df[outcome_df['Age Upon Outcome(days)'] == '<NA>']):
#     outcome_df[outcome_df['Age Upon Outcome(days)']= DateTime - Date of Birth

# #Converting the Days into integer
# #intake_df['Age Upon Intake(days)']=intake_df['Age Upon Intake(days)'].astype(int)


In [11]:
# Dropping Name column from the dataframe.

outcome_df=outcome_df.drop('Name',axis=1)

In [12]:

# Using datetime series getting the data for Intake Month , Intake year , Intake day name , intake hour.

series= outcome_df['DateTime']

sr = pd.to_datetime(series)

outcome_df['Outcome Month'] = sr.dt.month
outcome_df['Outcome Year']=sr.dt.year
outcome_df['Outcome Weekday'] = sr.dt.day_name()
outcome_df['Outcome Hour'] = sr.dt.hour

In [13]:
# Calculating the age upon intake in Years

outcome_df['Age Upon Outcome(years)'] = round(outcome_df['Age Upon Outcome(days)']/365,2)
outcome_df.head(2)
outcome_df['Age Upon Outcome(years)'].unique()


array([ 2.02,  1.02,  1.  , ..., 13.64, 17.87, 15.67])

In [14]:
# Adding age groups bins of intake animals
age_bins = [0,2.5,5,7.5,10,12.5,15,17.5,20,22.5,25,27.5,30]
outcome_df['Age Range']= pd.cut(outcome_df['Age Upon Outcome(years)'],age_bins, include_lowest=True)
outcome_df.head(2)

Unnamed: 0,Animal ID,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color,Age Upon Outcome(days),Outcome Month,Outcome Year,Outcome Weekday,Outcome Hour,Age Upon Outcome(years),Age Range
0,A794011,2019-05-08 18:20:00,19-May,2017-05-02,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White,736,5,2019,Wednesday,18,2.02,"(-0.001, 2.5]"
1,A776359,2018-07-18 16:02:00,18-Jul,2017-07-12,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown,371,7,2018,Wednesday,16,1.02,"(-0.001, 2.5]"


In [15]:
outcome_df.dtypes

Animal ID                          object
DateTime                   datetime64[ns]
MonthYear                          object
Date of Birth              datetime64[ns]
Outcome Type                       object
Outcome Subtype                    object
Animal Type                        object
Sex upon Outcome                   object
Age upon Outcome                   object
Breed                              object
Color                              object
Age Upon Outcome(days)              int64
Outcome Month                       int64
Outcome Year                        int64
Outcome Weekday                    object
Outcome Hour                        int64
Age Upon Outcome(years)           float64
Age Range                        category
dtype: object

In [16]:
outcome_df = outcome_df.sort_values(by=['Animal ID','DateTime'], ascending = [True, True])


# Getting the Unique count of the Outcome Frequency 

outcome_df['Outcome Frequency'] = outcome_df.groupby('Animal ID')['Animal ID'].transform('count')
outcome_df['Outcome Frequency'].unique()



array([ 3,  1,  2,  5,  4,  6,  7,  9,  8, 11, 14, 12, 33])

In [17]:
# add incremental value for duplicates Animal ID's but in the order of the day/time they were recieved.

outcome_df["Order of Outcome"] =outcome_df.groupby(['Animal ID'])['Animal ID'].transform('cumcount')+1
outcome_df["Order of Outcome"].unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33])

In [18]:

#Checking for a sample Animal ID = 'A462580' which came 6 times,results are sorted in ascending order of the day/time.

outcome_df[outcome_df['Animal ID']=='A462580']

Unnamed: 0,Animal ID,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color,Age Upon Outcome(days),Outcome Month,Outcome Year,Outcome Weekday,Outcome Hour,Age Upon Outcome(years),Age Range,Outcome Frequency,Order of Outcome
116981,A462580,2014-12-06 15:32:00,14-Dec,2004-10-01,Return to Owner,,Dog,Neutered Male,10 years,Pit Bull,Brown Brindle/White,3718,12,2014,Saturday,15,10.19,"(10.0, 12.5]",6,1
28497,A462580,2015-01-02 12:26:00,15-Jan,2004-10-01,Return to Owner,,Dog,Neutered Male,10 years,Pit Bull,Brown Brindle/White,3745,1,2015,Friday,12,10.26,"(10.0, 12.5]",6,2
33113,A462580,2015-08-04 18:09:00,15-Aug,2004-10-01,Return to Owner,,Dog,Neutered Male,10 years,Pit Bull,Brown Brindle/White,3959,8,2015,Tuesday,18,10.85,"(10.0, 12.5]",6,3
8020,A462580,2015-08-21 16:18:00,15-Aug,2004-10-01,Return to Owner,,Dog,Neutered Male,10 years,Pit Bull,Brown Brindle/White,3976,8,2015,Friday,16,10.89,"(10.0, 12.5]",6,4
48067,A462580,2015-10-02 18:25:00,15-Oct,2004-10-01,Return to Owner,,Dog,Neutered Male,11 years,Pit Bull,Brown Brindle/White,4018,10,2015,Friday,18,11.01,"(10.0, 12.5]",6,5
33930,A462580,2016-10-29 14:55:00,16-Oct,2004-10-01,Adoption,Foster,Dog,Neutered Male,12 years,Pit Bull,Brown Brindle/White,4411,10,2016,Saturday,14,12.08,"(10.0, 12.5]",6,6


In [19]:
outcome_df.head()

Unnamed: 0,Animal ID,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color,Age Upon Outcome(days),Outcome Month,Outcome Year,Outcome Weekday,Outcome Hour,Age Upon Outcome(years),Age Range,Outcome Frequency,Order of Outcome
124731,A006100,2014-03-08 17:10:00,14-Mar,2007-07-09,Return to Owner,,Dog,Neutered Male,6 years,Spinone Italiano Mix,Yellow/White,2434,3,2014,Saturday,17,6.67,"(5.0, 7.5]",3,1
61414,A006100,2014-12-20 16:35:00,14-Dec,2007-07-09,Return to Owner,,Dog,Neutered Male,7 years,Spinone Italiano Mix,Yellow/White,2721,12,2014,Saturday,16,7.45,"(5.0, 7.5]",3,2
109865,A006100,2017-12-07 00:00:00,17-Dec,2007-07-09,Return to Owner,,Dog,Neutered Male,10 years,Spinone Italiano Mix,Yellow/White,3804,12,2017,Thursday,0,10.42,"(10.0, 12.5]",3,3
42434,A047759,2014-04-07 15:12:00,14-Apr,2004-04-02,Transfer,Partner,Dog,Neutered Male,10 years,Dachshund,Tricolor,3657,4,2014,Monday,15,10.02,"(10.0, 12.5]",1,1
88228,A134067,2013-11-16 11:54:00,13-Nov,1997-10-16,Return to Owner,,Dog,Neutered Male,16 years,Shetland Sheepdog,Brown/White,5875,11,2013,Saturday,11,16.1,"(15.0, 17.5]",1,1


In [21]:
outcome_df.to_csv('AAC_Outcome_etl.csv',encoding='utf-8')