In [1]:
# Import dependencies

import pandas as pd
import numpy as np
from datetime import datetime
import time
import re
from sqlalchemy import create_engine
import psycopg2


In [2]:
# Read in Outake CSV
file_to_load = "Austin_Animal_Center_Intakes.csv"

# Create DF
Intakes_df = pd.read_csv(file_to_load)

Intakes_df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color
0,A786884,*Brock,01/03/2019 04:19:00 PM,January 2019,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor
1,A706918,Belle,07/05/2015 12:59:00 PM,July 2015,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver
2,A724273,Runster,04/14/2016 06:43:00 PM,April 2016,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White
3,A665644,,10/21/2013 07:59:00 AM,October 2013,Austin (TX),Stray,Sick,Cat,Intact Female,4 weeks,Domestic Shorthair Mix,Calico
4,A857105,Johnny Ringo,05/12/2022 12:23:00 AM,May 2022,4404 Sarasota Drive in Austin (TX),Public Assist,Normal,Cat,Neutered Male,2 years,Domestic Shorthair,Orange Tabby


In [3]:
Intakes_df.dtypes

Animal ID           object
Name                object
DateTime            object
MonthYear           object
Found Location      object
Intake Type         object
Intake Condition    object
Animal Type         object
Sex upon Intake     object
Age upon Intake     object
Breed               object
Color               object
dtype: object

In [4]:
Intakes_df.count()

Animal ID           143839
Name                101654
DateTime            143839
MonthYear           143839
Found Location      143839
Intake Type         143839
Intake Condition    143839
Animal Type         143839
Sex upon Intake     143838
Age upon Intake     143839
Breed               143839
Color               143839
dtype: int64

In [5]:
# Drop name and monthyear columns
Intakes_df = Intakes_df.drop(["Name", "MonthYear"], 1)
Intakes_df.head(5)

  


Unnamed: 0,Animal ID,DateTime,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color
0,A786884,01/03/2019 04:19:00 PM,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor
1,A706918,07/05/2015 12:59:00 PM,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver
2,A724273,04/14/2016 06:43:00 PM,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White
3,A665644,10/21/2013 07:59:00 AM,Austin (TX),Stray,Sick,Cat,Intact Female,4 weeks,Domestic Shorthair Mix,Calico
4,A857105,05/12/2022 12:23:00 AM,4404 Sarasota Drive in Austin (TX),Public Assist,Normal,Cat,Neutered Male,2 years,Domestic Shorthair,Orange Tabby


In [6]:
#Remove A from Animal ID and convert to INT
Intakes_df["Animal ID"] = Intakes_df["Animal ID"].str.replace("A", "")
Intakes_df["Animal ID"] = Intakes_df[["Animal ID"]].apply(pd.to_numeric)
Intakes_df["Animal ID"].dtypes

dtype('int64')

In [7]:
# Convert date/times to datetime
Intakes_df["DateTime"] = pd.to_datetime(Intakes_df["DateTime"])


In [8]:
# Split Datetime column 
Intakes_df["Intake Date"] = Intakes_df["DateTime"].dt.date
Intakes_df["Intake Year"] = Intakes_df["DateTime"].dt.year
Intakes_df["Intake Month"] = Intakes_df["DateTime"].dt.month
Intakes_df["Intake Day"] = Intakes_df["DateTime"].dt.day
Intakes_df["Intake Time"] = Intakes_df["DateTime"].dt.time

Intakes_df.head(5)

Unnamed: 0,Animal ID,DateTime,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color,Intake Date,Intake Year,Intake Month,Intake Day,Intake Time
0,786884,2019-01-03 16:19:00,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor,2019-01-03,2019,1,3,16:19:00
1,706918,2015-07-05 12:59:00,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver,2015-07-05,2015,7,5,12:59:00
2,724273,2016-04-14 18:43:00,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White,2016-04-14,2016,4,14,18:43:00
3,665644,2013-10-21 07:59:00,Austin (TX),Stray,Sick,Cat,Intact Female,4 weeks,Domestic Shorthair Mix,Calico,2013-10-21,2013,10,21,07:59:00
4,857105,2022-05-12 00:23:00,4404 Sarasota Drive in Austin (TX),Public Assist,Normal,Cat,Neutered Male,2 years,Domestic Shorthair,Orange Tabby,2022-05-12,2022,5,12,00:23:00


In [9]:
# Determine Day of the Week of Adoption
# Convert DateTime into Series so that can extrapolate the day of the week
Intakes_df["Intake Date"] = pd.to_datetime(Intakes_df["Intake Date"])
Date_time_series = Intakes_df.iloc[:,10]
Date_time_series = Date_time_series.dt.dayofweek
Date_time_series.head(3)

0    3
1    6
2    3
Name: Intake Date, dtype: int64

In [10]:
# Add new Day of the Week column to Intakes_df (0 = Sunday, 1 = Monday, etc)
Intakes_df["Intake Day of the Week"] = Date_time_series

# Delete DateTime
Intakes_df.drop(columns = ["DateTime"], axis = 1, inplace = True)


In [11]:
Intakes_df.head()

Unnamed: 0,Animal ID,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color,Intake Date,Intake Year,Intake Month,Intake Day,Intake Time,Intake Day of the Week
0,786884,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor,2019-01-03,2019,1,3,16:19:00,3
1,706918,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver,2015-07-05,2015,7,5,12:59:00,6
2,724273,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White,2016-04-14,2016,4,14,18:43:00,3
3,665644,Austin (TX),Stray,Sick,Cat,Intact Female,4 weeks,Domestic Shorthair Mix,Calico,2013-10-21,2013,10,21,07:59:00,0
4,857105,4404 Sarasota Drive in Austin (TX),Public Assist,Normal,Cat,Neutered Male,2 years,Domestic Shorthair,Orange Tabby,2022-05-12,2022,5,12,00:23:00,3


In [18]:
Intakes_df.count()

Animal ID                 143839
Found Location            143839
Intake Type               143839
Intake Condition          143839
Animal Type               143839
Sex upon Intake           143838
Age upon Intake           143839
Breed                     143839
Color                     143839
Intake Date               143839
Intake Year               143839
Intake Month              143839
Intake Day                143839
Intake Time               143839
Intake Day of the Week    143839
dtype: int64

In [23]:
# Remove "Wildlife" from Intake Type and delete those rows
Intakes_df = Intakes_df[Intakes_df["Intake Type"] != "Wildlife"]
Intakes_df.count()

Animal ID                 138296
Found Location            138296
Intake Type               138296
Intake Condition          138296
Animal Type               138296
Sex upon Intake           138295
Age upon Intake           138296
Breed                     138296
Color                     138296
Intake Date               138296
Intake Year               138296
Intake Month              138296
Intake Day                138296
Intake Time               138296
Intake Day of the Week    138296
dtype: int64

In [None]:
Intakes_df.to_csv("Intakes_cleaned")