In [1]:
# Import dependencies

import pandas as pd
import numpy as np
from datetime import datetime
import time
import re
from sqlalchemy import create_engine
import psycopg2


In [2]:
# Read in Outake CSV
file_to_load = "Austin_Animal_Center_Outcomes.csv"

# Create DF
Outcomes_df = pd.read_csv(file_to_load)


In [3]:
Outcomes_df.dtypes

Animal ID           object
Name                object
DateTime            object
MonthYear           object
Date of Birth       object
Outcome Type        object
Outcome Subtype     object
Animal Type         object
Sex upon Outcome    object
Age upon Outcome    object
Breed               object
Color               object
dtype: object

In [4]:
Outcomes_df.count()

Animal ID           143723
Name                101667
DateTime            143723
MonthYear           143723
Date of Birth       143723
Outcome Type        143700
Outcome Subtype      65906
Animal Type         143723
Sex upon Outcome    143722
Age upon Outcome    143686
Breed               143723
Color               143723
dtype: int64

In [5]:
# Drop name and monthyear columns
Outcomes_df = Outcomes_df.drop(["Name", "MonthYear"], 1)
Outcomes_df.head(5)

  


Unnamed: 0,Animal ID,DateTime,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
0,A794011,05/08/2019 06:20:00 PM,05/02/2017,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White
1,A776359,07/18/2018 04:02:00 PM,07/12/2017,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown
2,A821648,08/16/2020 11:38:00 AM,08/16/2019,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray
3,A720371,02/13/2016 05:59:00 PM,10/08/2015,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff
4,A674754,03/18/2014 11:47:00 AM,03/12/2014,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby


In [6]:
#Checking for non-A starts to Animal ID
Outcomes_df["Animal ID"].unique()

array(['A794011', 'A776359', 'A821648', ..., 'A842965', 'A802462',
       'A864881'], dtype=object)

In [7]:
#Remove A from Animal ID and convert to INT
Outcomes_df["Animal ID"] = Outcomes_df["Animal ID"].str.replace("A", "")
Outcomes_df["Animal ID"] = Outcomes_df[["Animal ID"]].apply(pd.to_numeric)
Outcomes_df["Animal ID"].dtypes

dtype('int64')

In [8]:
# Convert date/times to datetime
Outcomes_df["DateTime"] = pd.to_datetime(Outcomes_df["DateTime"])
Outcomes_df["Date of Birth"] = pd.to_datetime(Outcomes_df["Date of Birth"])

Outcomes_df.dtypes

Animal ID                    int64
DateTime            datetime64[ns]
Date of Birth       datetime64[ns]
Outcome Type                object
Outcome Subtype             object
Animal Type                 object
Sex upon Outcome            object
Age upon Outcome            object
Breed                       object
Color                       object
dtype: object

In [18]:
# Split Datetime column 
Outcomes_df["Outcome Date"] = Outcomes_df["DateTime"].dt.date
Outcomes_df["Outcome Year"] = Outcomes_df["DateTime"].dt.year
Outcomes_df["Outcome Month"] = Outcomes_df["DateTime"].dt.month
Outcomes_df["Outcome Day"] = Outcomes_df["DateTime"].dt.day
Outcomes_df["Outcome Time"] = Outcomes_df["DateTime"].dt.time
Outcomes_df["DOB Year"] = Outcomes_df["Date of Birth"].dt.year
Outcomes_df["DOB Month"]= Outcomes_df["Date of Birth"].dt.month
Outcomes_df["DOB Day"]= Outcomes_df["Date of Birth"].dt.day

Outcomes_df.head()

Unnamed: 0,Animal ID,DateTime,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color,Outcome Year,Outcome Month,Outcome Day,Outcome Time,Outcome Date,Outcome Day of the Week,DOB Year,DOB Month,DOB Day
0,794011,2019-05-08 18:20:00,2017-05-02,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White,2019,5,8,18:20:00,2019-05-08,2,2017,5,2
1,776359,2018-07-18 16:02:00,2017-07-12,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown,2018,7,18,16:02:00,2018-07-18,2,2017,7,12
2,821648,2020-08-16 11:38:00,2019-08-16,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray,2020,8,16,11:38:00,2020-08-16,6,2019,8,16
3,720371,2016-02-13 17:59:00,2015-10-08,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff,2016,2,13,17:59:00,2016-02-13,5,2015,10,8
4,674754,2014-03-18 11:47:00,2014-03-12,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby,2014,3,18,11:47:00,2014-03-18,1,2014,3,12


In [19]:
# Determine Day of the Week of Adoption
# Convert DateTime into Series so that can extrapolate the day of the week
Outcomes_df["Outcome Date"] = pd.to_datetime(Outcomes_df["Outcome Date"])
Date_time_series = Outcomes_df.iloc[:,14]
Date_time_series = Date_time_series.dt.dayofweek
Date_time_series.head(3)

0    2
1    2
2    6
Name: Outcome Date, dtype: int64

In [20]:
Outcomes_df["Outcome Day of the Week"] = Date_time_series
Outcomes_df.head()

Unnamed: 0,Animal ID,DateTime,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color,Outcome Year,Outcome Month,Outcome Day,Outcome Time,Outcome Date,Outcome Day of the Week,DOB Year,DOB Month,DOB Day
0,794011,2019-05-08 18:20:00,2017-05-02,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White,2019,5,8,18:20:00,2019-05-08,2,2017,5,2
1,776359,2018-07-18 16:02:00,2017-07-12,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown,2018,7,18,16:02:00,2018-07-18,2,2017,7,12
2,821648,2020-08-16 11:38:00,2019-08-16,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray,2020,8,16,11:38:00,2020-08-16,6,2019,8,16
3,720371,2016-02-13 17:59:00,2015-10-08,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff,2016,2,13,17:59:00,2016-02-13,5,2015,10,8
4,674754,2014-03-18 11:47:00,2014-03-12,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby,2014,3,18,11:47:00,2014-03-18,1,2014,3,12


In [22]:
# Drop DateTime column
Outcomes_df.drop(["DateTime"], 1)
Outcomes_df.head(3)

  


Unnamed: 0,Animal ID,DateTime,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color,Outcome Year,Outcome Month,Outcome Day,Outcome Time,Outcome Date,Outcome Day of the Week,DOB Year,DOB Month,DOB Day
0,794011,2019-05-08 18:20:00,2017-05-02,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White,2019,5,8,18:20:00,2019-05-08,2,2017,5,2
1,776359,2018-07-18 16:02:00,2017-07-12,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown,2018,7,18,16:02:00,2018-07-18,2,2017,7,12
2,821648,2020-08-16 11:38:00,2019-08-16,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray,2020,8,16,11:38:00,2020-08-16,6,2019,8,16


In [28]:
# Convert Age upon Outcome to days and delete column
Outcomes_df["Age at Outcome"] = (Outcomes_df["Outcome Date"] - Outcomes_df["Date of Birth"]).dt.days

Outcomes_df.head()

Unnamed: 0,Animal ID,DateTime,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color,Outcome Year,Outcome Month,Outcome Day,Outcome Time,Outcome Date,Outcome Day of the Week,DOB Year,DOB Month,DOB Day,Age at Outcome
0,794011,2019-05-08 18:20:00,2017-05-02,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White,2019,5,8,18:20:00,2019-05-08,2,2017,5,2,736
1,776359,2018-07-18 16:02:00,2017-07-12,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown,2018,7,18,16:02:00,2018-07-18,2,2017,7,12,371
2,821648,2020-08-16 11:38:00,2019-08-16,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray,2020,8,16,11:38:00,2020-08-16,6,2019,8,16,366
3,720371,2016-02-13 17:59:00,2015-10-08,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff,2016,2,13,17:59:00,2016-02-13,5,2015,10,8,128
4,674754,2014-03-18 11:47:00,2014-03-12,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby,2014,3,18,11:47:00,2014-03-18,1,2014,3,12,6
