In [2]:
import pandas as pd
import numpy as np

In [3]:
# Importing the Austin Animal Center Intake data from a csv into a dataframe

file_path = "./Austin_Animal_Center_Intakes.csv"
intake_df = pd.read_csv(file_path,low_memory=False)
intake_df.head(5)

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color
0,A786884,*Brock,01/03/2019 04:19:00 PM,January 2019,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor
1,A706918,Belle,07/05/2015 12:59:00 PM,July 2015,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver
2,A724273,Runster,04/14/2016 06:43:00 PM,April 2016,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White
3,A665644,,10/21/2013 07:59:00 AM,October 2013,Austin (TX),Stray,Sick,Cat,Intact Female,4 weeks,Domestic Shorthair Mix,Calico
4,A857105,Johnny Ringo,05/12/2022 12:23:00 AM,May 2022,4404 Sarasota Drive in Austin (TX),Public Assist,Normal,Cat,Neutered Male,2 years,Domestic Shorthair,Orange Tabby


In [4]:
# Copying the dataframe into a new dataframe so that it can be transformed with addtional data.

df2 = intake_df.copy()
df2.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color
0,A786884,*Brock,01/03/2019 04:19:00 PM,January 2019,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor
1,A706918,Belle,07/05/2015 12:59:00 PM,July 2015,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver
2,A724273,Runster,04/14/2016 06:43:00 PM,April 2016,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White
3,A665644,,10/21/2013 07:59:00 AM,October 2013,Austin (TX),Stray,Sick,Cat,Intact Female,4 weeks,Domestic Shorthair Mix,Calico
4,A857105,Johnny Ringo,05/12/2022 12:23:00 AM,May 2022,4404 Sarasota Drive in Austin (TX),Public Assist,Normal,Cat,Neutered Male,2 years,Domestic Shorthair,Orange Tabby


In [5]:
# Splitting the Age upon intake column into two columns 0 & 1 containing the number and the unit.

d = {'months': 31, 'years':365, 'days':1,'weeks':7,'month': 31, 'year':365,'day':1,'week':7}
df3=df2['Age upon Intake'].str.extract('(\d+)\s+(years|months|weeks|days|year|month|week|day)', expand=True)

In [6]:
df3.head()

Unnamed: 0,0,1
0,2,years
1,8,years
2,11,months
3,4,weeks
4,2,years


In [7]:
# Here mapping the above data so that we can get Age Upon Intake in days


intake_df['Age Upon Intake(days)'] = df3[0].astype(float).mul(df3[1].map(d)).astype('Int64').astype(str)
#df2['age_upon_intake(days)'] = df3[0].astype(float).mul(df3[1].map(d)).astype('Int64')
df2['Unit'] = np.where(df3[1].isin(['years','months', 'days','weeks']), ' days', ' ' + df3[1])


intake_df.head()


Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color,Age Upon Intake(days)
0,A786884,*Brock,01/03/2019 04:19:00 PM,January 2019,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor,730
1,A706918,Belle,07/05/2015 12:59:00 PM,July 2015,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver,2920
2,A724273,Runster,04/14/2016 06:43:00 PM,April 2016,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White,341
3,A665644,,10/21/2013 07:59:00 AM,October 2013,Austin (TX),Stray,Sick,Cat,Intact Female,4 weeks,Domestic Shorthair Mix,Calico,28
4,A857105,Johnny Ringo,05/12/2022 12:23:00 AM,May 2022,4404 Sarasota Drive in Austin (TX),Public Assist,Normal,Cat,Neutered Male,2 years,Domestic Shorthair,Orange Tabby,730


In [8]:
intake_df.dtypes

Animal ID                object
Name                     object
DateTime                 object
MonthYear                object
Found Location           object
Intake Type              object
Intake Condition         object
Animal Type              object
Sex upon Intake          object
Age upon Intake          object
Breed                    object
Color                    object
Age Upon Intake(days)    object
dtype: object

In [9]:
# Checking if the mapping to convert the age upon intake into days was successful and there are no NA 
#or not applicable values

intake_df[intake_df['Age Upon Intake(days)'] == '<NA>']

#Converting the Days into integer
intake_df['Age Upon Intake(days)']=intake_df['Age Upon Intake(days)'].astype(int)


In [10]:
# Calculating the age upon intake in Years

intake_df['Age Upon Intake(years)'] = intake_df['Age Upon Intake(days)']/365
intake_df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color,Age Upon Intake(days),Age Upon Intake(years)
0,A786884,*Brock,01/03/2019 04:19:00 PM,January 2019,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor,730,2.0
1,A706918,Belle,07/05/2015 12:59:00 PM,July 2015,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver,2920,8.0
2,A724273,Runster,04/14/2016 06:43:00 PM,April 2016,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White,341,0.934247
3,A665644,,10/21/2013 07:59:00 AM,October 2013,Austin (TX),Stray,Sick,Cat,Intact Female,4 weeks,Domestic Shorthair Mix,Calico,28,0.076712
4,A857105,Johnny Ringo,05/12/2022 12:23:00 AM,May 2022,4404 Sarasota Drive in Austin (TX),Public Assist,Normal,Cat,Neutered Male,2 years,Domestic Shorthair,Orange Tabby,730,2.0


In [11]:
# checking the datatypes for the intake dataframe
intake_df.dtypes

Animal ID                  object
Name                       object
DateTime                   object
MonthYear                  object
Found Location             object
Intake Type                object
Intake Condition           object
Animal Type                object
Sex upon Intake            object
Age upon Intake            object
Breed                      object
Color                      object
Age Upon Intake(days)       int64
Age Upon Intake(years)    float64
dtype: object

In [12]:
# Using datetime series getting the data for Intake Month , Intake year , Intake day name , intake hour.

series= intake_df['DateTime']

sr = pd.to_datetime(series)

intake_df['Intake Month'] = sr.dt.month
intake_df['Intake Year']=sr.dt.year
intake_df['Intake Weekday'] = sr.dt.day_name()
intake_df['Intake Hour'] = sr.dt.hour


In [13]:
intake_df.head(5)

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color,Age Upon Intake(days),Age Upon Intake(years),Intake Month,Intake Year,Intake Weekday,Intake Hour
0,A786884,*Brock,01/03/2019 04:19:00 PM,January 2019,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor,730,2.0,1,2019,Thursday,16
1,A706918,Belle,07/05/2015 12:59:00 PM,July 2015,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver,2920,8.0,7,2015,Sunday,12
2,A724273,Runster,04/14/2016 06:43:00 PM,April 2016,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White,341,0.934247,4,2016,Thursday,18
3,A665644,,10/21/2013 07:59:00 AM,October 2013,Austin (TX),Stray,Sick,Cat,Intact Female,4 weeks,Domestic Shorthair Mix,Calico,28,0.076712,10,2013,Monday,7
4,A857105,Johnny Ringo,05/12/2022 12:23:00 AM,May 2022,4404 Sarasota Drive in Austin (TX),Public Assist,Normal,Cat,Neutered Male,2 years,Domestic Shorthair,Orange Tabby,730,2.0,5,2022,Thursday,0


In [15]:
intake_df.to_csv('AAC_Intake_etl.csv')