In [1]:
# Import dependencies

import pandas as pd
import numpy as np
from datetime import datetime
import time
import re
from sqlalchemy import create_engine
import psycopg2


In [2]:
# Read in Outake CSV
file_to_load = "Austin_Animal_Center_Outcomes.csv"

# Create DF
Outcomes_df = pd.read_csv(file_to_load)


In [3]:
Outcomes_df.dtypes

Animal ID           object
Name                object
DateTime            object
MonthYear           object
Date of Birth       object
Outcome Type        object
Outcome Subtype     object
Animal Type         object
Sex upon Outcome    object
Age upon Outcome    object
Breed               object
Color               object
dtype: object

In [4]:
Outcomes_df.count()

Animal ID           143723
Name                101667
DateTime            143723
MonthYear           143723
Date of Birth       143723
Outcome Type        143700
Outcome Subtype      65906
Animal Type         143723
Sex upon Outcome    143722
Age upon Outcome    143686
Breed               143723
Color               143723
dtype: int64

In [5]:
# remove spaces in columns name
Outcomes_df.columns = Outcomes_df.columns.str.replace(' ','_')
Outcomes_df.count()

Animal_ID           143723
Name                101667
DateTime            143723
MonthYear           143723
Date_of_Birth       143723
Outcome_Type        143700
Outcome_Subtype      65906
Animal_Type         143723
Sex_upon_Outcome    143722
Age_upon_Outcome    143686
Breed               143723
Color               143723
dtype: int64

In [6]:
# Drop name and monthyear columns
Outcomes_df = Outcomes_df.drop(["Name", "MonthYear"], 1)
Outcomes_df.head(5)

  


Unnamed: 0,Animal_ID,DateTime,Date_of_Birth,Outcome_Type,Outcome_Subtype,Animal_Type,Sex_upon_Outcome,Age_upon_Outcome,Breed,Color
0,A794011,05/08/2019 06:20:00 PM,05/02/2017,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White
1,A776359,07/18/2018 04:02:00 PM,07/12/2017,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown
2,A821648,08/16/2020 11:38:00 AM,08/16/2019,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray
3,A720371,02/13/2016 05:59:00 PM,10/08/2015,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff
4,A674754,03/18/2014 11:47:00 AM,03/12/2014,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby


In [7]:
#Remove A from Animal ID and convert to INT
Outcomes_df["Animal_ID"] = Outcomes_df["Animal_ID"].str.replace("A", "")
Outcomes_df["Animal_ID"] = Outcomes_df[["Animal_ID"]].apply(pd.to_numeric)
Outcomes_df["Animal_ID"].dtypes

dtype('int64')

In [8]:
# Convert date/times to datetime
Outcomes_df["DateTime"] = pd.to_datetime(Outcomes_df["DateTime"])
Outcomes_df["Date_of_Birth"] = pd.to_datetime(Outcomes_df["Date_of_Birth"])


In [9]:
# Split Datetime column 
Outcomes_df["Outcome_Date"] = Outcomes_df["DateTime"].dt.date
Outcomes_df["Outcome_Year"] = Outcomes_df["DateTime"].dt.year
Outcomes_df["Outcome_Month"] = Outcomes_df["DateTime"].dt.month
Outcomes_df["Outcome_Day"] = Outcomes_df["DateTime"].dt.day
Outcomes_df["Outcome_Time"] = Outcomes_df["DateTime"].dt.time
Outcomes_df["DOB_Year"] = Outcomes_df["Date_of_Birth"].dt.year
Outcomes_df["DOB_Month"]= Outcomes_df["Date_of_Birth"].dt.month
Outcomes_df["DOB_Day"]= Outcomes_df["Date_of_Birth"].dt.day

Outcomes_df.head()

Unnamed: 0,Animal_ID,DateTime,Date_of_Birth,Outcome_Type,Outcome_Subtype,Animal_Type,Sex_upon_Outcome,Age_upon_Outcome,Breed,Color,Outcome_Date,Outcome_Year,Outcome_Month,Outcome_Day,Outcome_Time,DOB_Year,DOB_Month,DOB_Day
0,794011,2019-05-08 18:20:00,2017-05-02,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White,2019-05-08,2019,5,8,18:20:00,2017,5,2
1,776359,2018-07-18 16:02:00,2017-07-12,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown,2018-07-18,2018,7,18,16:02:00,2017,7,12
2,821648,2020-08-16 11:38:00,2019-08-16,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray,2020-08-16,2020,8,16,11:38:00,2019,8,16
3,720371,2016-02-13 17:59:00,2015-10-08,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff,2016-02-13,2016,2,13,17:59:00,2015,10,8
4,674754,2014-03-18 11:47:00,2014-03-12,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby,2014-03-18,2014,3,18,11:47:00,2014,3,12


In [10]:
# Determine Day of the Week of Adoption
# Convert DateTime into Series so that can extrapolate the day of the week
# Return the day of the week as an integer, where Monday is 0 and Sunday is 6
Outcomes_df["Outcome_Date"] = pd.to_datetime(Outcomes_df["Outcome_Date"])
Date_time_series = Outcomes_df.iloc[:,10]
Date_time_series = Date_time_series.dt.dayofweek
Date_time_series.head(3)

0    2
1    2
2    6
Name: Outcome_Date, dtype: int64

In [11]:
# Add new Day of the Week column to Outcomes_df (0 = Sunday, 1 = Monday, etc)
Outcomes_df["Outcome_Day_of_the_Week"] = Date_time_series

# Delete DateTime
Outcomes_df.drop(columns = ["DateTime"], axis = 1, inplace = True)


In [12]:
# Convert Age upon Outcome to days and delete column
Outcomes_df["Age_at_Outcome"] = (Outcomes_df["Outcome_Date"] - Outcomes_df["Date_of_Birth"]).dt.days
Outcomes_df.drop(["Age_upon_Outcome"], 1, inplace = True)
Outcomes_df.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Animal_ID,Date_of_Birth,Outcome_Type,Outcome_Subtype,Animal_Type,Sex_upon_Outcome,Breed,Color,Outcome_Date,Outcome_Year,Outcome_Month,Outcome_Day,Outcome_Time,DOB_Year,DOB_Month,DOB_Day,Outcome_Day_of_the_Week,Age_at_Outcome
0,794011,2017-05-02,Rto-Adopt,,Cat,Neutered Male,Domestic Shorthair Mix,Brown Tabby/White,2019-05-08,2019,5,8,18:20:00,2017,5,2,2,736
1,776359,2017-07-12,Adoption,,Dog,Neutered Male,Chihuahua Shorthair Mix,White/Brown,2018-07-18,2018,7,18,16:02:00,2017,7,12,2,371
2,821648,2019-08-16,Euthanasia,,Other,Unknown,Raccoon,Gray,2020-08-16,2020,8,16,11:38:00,2019,8,16,6,366
3,720371,2015-10-08,Adoption,,Dog,Neutered Male,Anatol Shepherd/Labrador Retriever,Buff,2016-02-13,2016,2,13,17:59:00,2015,10,8,5,128
4,674754,2014-03-12,Transfer,Partner,Cat,Intact Male,Domestic Shorthair Mix,Orange Tabby,2014-03-18,2014,3,18,11:47:00,2014,3,12,1,6


In [13]:
Outcomes_df.count()

Animal_ID                  143723
Date_of_Birth              143723
Outcome_Type               143700
Outcome_Subtype             65906
Animal_Type                143723
Sex_upon_Outcome           143722
Breed                      143723
Color                      143723
Outcome_Date               143723
Outcome_Year               143723
Outcome_Month              143723
Outcome_Day                143723
Outcome_Time               143723
DOB_Year                   143723
DOB_Month                  143723
DOB_Day                    143723
Outcome_Day_of_the_Week    143723
Age_at_Outcome             143723
dtype: int64

In [None]:
#Outcomes_df.to_csv

In [16]:
#changed to this export to drop the index column.
Outcomes_df.to_csv("Outcomes_cleaned.csv", encoding='utf-8')