In [2]:
import numpy as np
import pandas as pd
import requests
from data_cleaning import *

## **Get data using API**

In [3]:
# API endpoint URL(set the limit to get all data(default is 1000 rows))
urls = {
    "intake": "https://data.austintexas.gov/resource/wter-evkm.csv?$limit=160000",
    "outcome": "https://data.austintexas.gov/resource/9t4d-g238.csv?$limit=160000"
    }
intake, outcome = get_data(urls)

Successfully get INTAKE data(157561 rows)...
Successfully get OUTCOME data(157481 rows)...


## Check number of rows, columns, and duplicated rows

In [4]:
print(f"INTAKE data: {intake.shape[0]} rows | {intake.shape[1]} columns | {intake.duplicated().sum()} duplicated rows")
print(f"OUTCOME data: {outcome.shape[0]} rows | {outcome.shape[1]} columns | {outcome.duplicated().sum()} duplicated rows")

INTAKE data: 157561 rows | 12 columns | 30 duplicated rows
OUTCOME data: 157481 rows | 12 columns | 23 duplicated rows


## Drop duplicates

In [5]:
intake.drop_duplicates(inplace=True)
outcome.drop_duplicates(inplace=True)

## Check number of rows, columns, and duplicated rows again

In [6]:
print(f"INTAKE data: {intake.shape[0]} rows | {intake.shape[1]} columns | {intake.duplicated().sum()} duplicated rows")
print(f"OUTCOME data: {outcome.shape[0]} rows | {outcome.shape[1]} columns | {outcome.duplicated().sum()} duplicated rows")

INTAKE data: 157531 rows | 12 columns | 0 duplicated rows
OUTCOME data: 157458 rows | 12 columns | 0 duplicated rows


## Remove `datetime2` in `intake` data and `monthyear` in `outcome` data

In [7]:
intake.drop("datetime2", axis=1, inplace = True)
outcome.drop("monthyear", axis=1, inplace = True)

In [8]:
intake

Unnamed: 0,animal_id,name,datetime,found_location,intake_type,intake_condition,animal_type,sex_upon_intake,age_upon_intake,breed,color
0,A786884,*Brock,2019-01-03T16:19:00.000,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor
1,A706918,Belle,2015-07-05T12:59:00.000,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver
2,A724273,Runster,2016-04-14T18:43:00.000,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White
3,A857105,Johnny Ringo,2022-05-12T00:23:00.000,4404 Sarasota Drive in Austin (TX),Public Assist,Normal,Cat,Neutered Male,2 years,Domestic Shorthair,Orange Tabby
4,A682524,Rio,2014-06-29T10:38:00.000,800 Grove Blvd in Austin (TX),Stray,Normal,Dog,Neutered Male,4 years,Doberman Pinsch/Australian Cattle Dog,Tan/Gray
...,...,...,...,...,...,...,...,...,...,...,...
157556,A775309,Snowball,2018-06-27T18:16:00.000,Austin (TX),Owner Surrender,Normal,Cat,Spayed Female,2 years,Turkish Angora,White
157557,A760677,,2017-10-22T13:58:00.000,Thomaswood Lane And Lenape Trail in Austin (TX),Stray,Normal,Cat,Intact Male,2 years,Domestic Shorthair Mix,Blue/White
157558,A811255,,2019-12-31T08:10:00.000,12034 Research Blvd in Austin (TX),Stray,Injured,Cat,Intact Female,5 months,Domestic Shorthair,Black
157559,A763156,,2017-12-04T11:42:00.000,1009 West Dittmar Road in Austin (TX),Stray,Normal,Dog,Intact Male,2 years,Miniature Poodle/Cocker Spaniel,White


In [9]:
outcome

Unnamed: 0,animal_id,name,datetime,date_of_birth,outcome_type,outcome_subtype,animal_type,sex_upon_outcome,age_upon_outcome,breed,color
0,A794011,Chunk,2019-05-08T18:20:00.000,2017-05-02T00:00:00.000,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White
1,A776359,Gizmo,2018-07-18T16:02:00.000,2017-07-12T00:00:00.000,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown
2,A821648,,2020-08-16T11:38:00.000,2019-08-16T00:00:00.000,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray
3,A720371,Moose,2016-02-13T17:59:00.000,2015-10-08T00:00:00.000,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff
4,A674754,,2014-03-18T11:47:00.000,2014-03-12T00:00:00.000,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby
...,...,...,...,...,...,...,...,...,...,...,...
157476,A892186,*Slate,2023-11-30T13:41:00.000,2021-11-02T00:00:00.000,Transfer,Partner,Cat,Intact Male,2 years,Domestic Shorthair Mix,Black/White
157477,A892197,*Volodymyr,2023-11-30T13:42:00.000,2021-11-02T00:00:00.000,Transfer,Partner,Cat,Intact Male,2 years,Domestic Shorthair Mix,Brown Tabby/White
157478,A888494,"Luca ""Aka"" Shaq",2023-09-19T12:37:00.000,2022-03-07T00:00:00.000,Return to Owner,Field,Dog,Neutered Male,1 year,Pit Bull,Tan
157479,A893447,,2023-11-30T13:40:00.000,2021-11-21T00:00:00.000,Transfer,Partner,Cat,Unknown,2 years,Domestic Shorthair,Blue Tabby/Brown Tabby


In [10]:
intake = intake.sort_values(by=["animal_id", "datetime"], ascending=True).reset_index(drop=True)
outcome = outcome.sort_values(by=["animal_id", "datetime"], ascending=True).reset_index(drop=True)

In [11]:
intake

Unnamed: 0,animal_id,name,datetime,found_location,intake_type,intake_condition,animal_type,sex_upon_intake,age_upon_intake,breed,color
0,A006100,Scamp,2014-03-07T14:26:00.000,8700 Research in Austin (TX),Public Assist,Normal,Dog,Neutered Male,6 years,Spinone Italiano Mix,Yellow/White
1,A006100,Scamp,2014-12-19T10:21:00.000,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Neutered Male,7 years,Spinone Italiano Mix,Yellow/White
2,A006100,Scamp,2017-12-07T14:07:00.000,Colony Creek And Hunters Trace in Austin (TX),Stray,Normal,Dog,Neutered Male,10 years,Spinone Italiano Mix,Yellow/White
3,A047759,Oreo,2014-04-02T15:55:00.000,Austin (TX),Owner Surrender,Normal,Dog,Neutered Male,10 years,Dachshund,Tricolor
4,A134067,Bandit,2013-11-16T09:02:00.000,12034 Research Blvd in Austin (TX),Public Assist,Injured,Dog,Neutered Male,16 years,Shetland Sheepdog,Brown/White
...,...,...,...,...,...,...,...,...,...,...,...
157526,A893921,,2023-11-30T12:25:00.000,Ridgepoint Dr in Austin (TX),Stray,Normal,Cat,Intact Male,3 months,Domestic Shorthair,Brown Tabby
157527,A893922,,2023-11-30T12:25:00.000,Ridgepoint Dr in Austin (TX),Stray,Normal,Cat,Intact Female,3 months,Domestic Shorthair,Brown Tabby
157528,A893925,,2023-11-30T12:49:00.000,7501 Bethune Avenue in Austin (TX),Stray,Normal,Cat,Intact Male,4 months,Domestic Shorthair,Blue Tabby
157529,A893926,,2023-11-30T12:27:00.000,2400 Wickersham Ln Unit #192 in Austin (TX),Stray,Normal,Dog,Intact Male,1 year,Siberian Husky/Great Pyrenees,White


In [12]:
outcome

Unnamed: 0,animal_id,name,datetime,date_of_birth,outcome_type,outcome_subtype,animal_type,sex_upon_outcome,age_upon_outcome,breed,color
0,A006100,Scamp,2014-03-08T17:10:00.000,2007-07-09T00:00:00.000,Return to Owner,,Dog,Neutered Male,6 years,Spinone Italiano Mix,Yellow/White
1,A006100,Scamp,2014-12-20T16:35:00.000,2007-07-09T00:00:00.000,Return to Owner,,Dog,Neutered Male,7 years,Spinone Italiano Mix,Yellow/White
2,A006100,Scamp,2017-12-07T00:00:00.000,2007-07-09T00:00:00.000,Return to Owner,,Dog,Neutered Male,10 years,Spinone Italiano Mix,Yellow/White
3,A047759,Oreo,2014-04-07T15:12:00.000,2004-04-02T00:00:00.000,Transfer,Partner,Dog,Neutered Male,10 years,Dachshund,Tricolor
4,A134067,Bandit,2013-11-16T11:54:00.000,1997-10-16T00:00:00.000,Return to Owner,,Dog,Neutered Male,16 years,Shetland Sheepdog,Brown/White
...,...,...,...,...,...,...,...,...,...,...,...
157453,A893776,,2023-11-30T08:50:00.000,2023-06-28T00:00:00.000,Transfer,Snr,Cat,Unknown,5 months,Domestic Longhair,Gray Tabby
157454,A893778,,2023-11-30T08:50:00.000,2021-11-28T00:00:00.000,Transfer,Snr,Cat,Unknown,2 years,Domestic Shorthair,Brown Tabby
157455,A893795,,2023-11-30T08:50:00.000,2023-07-28T00:00:00.000,Transfer,Snr,Cat,Unknown,4 months,Domestic Shorthair,Orange Tabby
157456,A893840,Luna,2023-11-30T12:30:00.000,2018-11-29T00:00:00.000,Return to Owner,,Dog,Intact Female,5 years,Pit Bull Mix,Brown/White


## format datetime

In [13]:
# date_format(df, col, format)
date_format(intake, "datetime", "%Y-%m-%d %H:%M")

Unnamed: 0,animal_id,name,datetime,found_location,intake_type,intake_condition,animal_type,sex_upon_intake,age_upon_intake,breed,color
0,A006100,Scamp,2014-03-07 14:26:00,8700 Research in Austin (TX),Public Assist,Normal,Dog,Neutered Male,6 years,Spinone Italiano Mix,Yellow/White
1,A006100,Scamp,2014-12-19 10:21:00,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Neutered Male,7 years,Spinone Italiano Mix,Yellow/White
2,A006100,Scamp,2017-12-07 14:07:00,Colony Creek And Hunters Trace in Austin (TX),Stray,Normal,Dog,Neutered Male,10 years,Spinone Italiano Mix,Yellow/White
3,A047759,Oreo,2014-04-02 15:55:00,Austin (TX),Owner Surrender,Normal,Dog,Neutered Male,10 years,Dachshund,Tricolor
4,A134067,Bandit,2013-11-16 09:02:00,12034 Research Blvd in Austin (TX),Public Assist,Injured,Dog,Neutered Male,16 years,Shetland Sheepdog,Brown/White
...,...,...,...,...,...,...,...,...,...,...,...
157526,A893921,,2023-11-30 12:25:00,Ridgepoint Dr in Austin (TX),Stray,Normal,Cat,Intact Male,3 months,Domestic Shorthair,Brown Tabby
157527,A893922,,2023-11-30 12:25:00,Ridgepoint Dr in Austin (TX),Stray,Normal,Cat,Intact Female,3 months,Domestic Shorthair,Brown Tabby
157528,A893925,,2023-11-30 12:49:00,7501 Bethune Avenue in Austin (TX),Stray,Normal,Cat,Intact Male,4 months,Domestic Shorthair,Blue Tabby
157529,A893926,,2023-11-30 12:27:00,2400 Wickersham Ln Unit #192 in Austin (TX),Stray,Normal,Dog,Intact Male,1 year,Siberian Husky/Great Pyrenees,White


In [14]:
date_format(outcome, "datetime", "%Y-%m-%d %H:%M")
date_format(outcome, "date_of_birth", "%Y-%m-%d")

Unnamed: 0,animal_id,name,datetime,date_of_birth,outcome_type,outcome_subtype,animal_type,sex_upon_outcome,age_upon_outcome,breed,color
0,A006100,Scamp,2014-03-08 17:10:00,2007-07-09,Return to Owner,,Dog,Neutered Male,6 years,Spinone Italiano Mix,Yellow/White
1,A006100,Scamp,2014-12-20 16:35:00,2007-07-09,Return to Owner,,Dog,Neutered Male,7 years,Spinone Italiano Mix,Yellow/White
2,A006100,Scamp,2017-12-07 00:00:00,2007-07-09,Return to Owner,,Dog,Neutered Male,10 years,Spinone Italiano Mix,Yellow/White
3,A047759,Oreo,2014-04-07 15:12:00,2004-04-02,Transfer,Partner,Dog,Neutered Male,10 years,Dachshund,Tricolor
4,A134067,Bandit,2013-11-16 11:54:00,1997-10-16,Return to Owner,,Dog,Neutered Male,16 years,Shetland Sheepdog,Brown/White
...,...,...,...,...,...,...,...,...,...,...,...
157453,A893776,,2023-11-30 08:50:00,2023-06-28,Transfer,Snr,Cat,Unknown,5 months,Domestic Longhair,Gray Tabby
157454,A893778,,2023-11-30 08:50:00,2021-11-28,Transfer,Snr,Cat,Unknown,2 years,Domestic Shorthair,Brown Tabby
157455,A893795,,2023-11-30 08:50:00,2023-07-28,Transfer,Snr,Cat,Unknown,4 months,Domestic Shorthair,Orange Tabby
157456,A893840,Luna,2023-11-30 12:30:00,2018-11-29,Return to Owner,,Dog,Intact Female,5 years,Pit Bull Mix,Brown/White


# Merge dataframes

In [15]:
intake.columns

Index(['animal_id', 'name', 'datetime', 'found_location', 'intake_type',
       'intake_condition', 'animal_type', 'sex_upon_intake', 'age_upon_intake',
       'breed', 'color'],
      dtype='object')

In [16]:
outcome.columns

Index(['animal_id', 'name', 'datetime', 'date_of_birth', 'outcome_type',
       'outcome_subtype', 'animal_type', 'sex_upon_outcome',
       'age_upon_outcome', 'breed', 'color'],
      dtype='object')

# Drop duplicated columns in outcome data

In [17]:
outcome.drop(["name", "animal_type", "sex_upon_outcome", "breed", "color"],
             inplace=True,
             axis=1)

In [18]:
intake_n_outcome = merge_intake_n_outcome(intake, outcome)
intake_n_outcome

Unnamed: 0,animal_id,name,datetime_intake,found_location,intake_type,intake_condition,animal_type,sex_upon_intake,age_upon_intake,breed,color,datetime_outcome,datetime_outcome.1,date_of_birth,outcome_type,outcome_subtype,age_upon_outcome
0,A006100,Scamp,2014-03-07 14:26:00,8700 Research in Austin (TX),Public Assist,Normal,Dog,Neutered Male,6 years,Spinone Italiano Mix,Yellow/White,2014-03-08 17:10:00,2014-03-08 17:10:00,2007-07-09,Return to Owner,,6 years
1,A006100,Scamp,2014-12-19 10:21:00,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Neutered Male,7 years,Spinone Italiano Mix,Yellow/White,2014-12-20 16:35:00,2014-12-20 16:35:00,2007-07-09,Return to Owner,,7 years
2,A006100,Scamp,2017-12-07 14:07:00,Colony Creek And Hunters Trace in Austin (TX),Stray,Normal,Dog,Neutered Male,10 years,Spinone Italiano Mix,Yellow/White,2017-12-07 00:00:00,2017-12-07 00:00:00,2007-07-09,Return to Owner,,10 years
3,A047759,Oreo,2014-04-02 15:55:00,Austin (TX),Owner Surrender,Normal,Dog,Neutered Male,10 years,Dachshund,Tricolor,2014-04-07 15:12:00,2014-04-07 15:12:00,2004-04-02,Transfer,Partner,10 years
4,A134067,Bandit,2013-11-16 09:02:00,12034 Research Blvd in Austin (TX),Public Assist,Injured,Dog,Neutered Male,16 years,Shetland Sheepdog,Brown/White,2013-11-16 11:54:00,2013-11-16 11:54:00,1997-10-16,Return to Owner,,16 years
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157527,A893921,,2023-11-30 12:25:00,Ridgepoint Dr in Austin (TX),Stray,Normal,Cat,Intact Male,3 months,Domestic Shorthair,Brown Tabby,NaT,NaT,NaT,,,
157528,A893922,,2023-11-30 12:25:00,Ridgepoint Dr in Austin (TX),Stray,Normal,Cat,Intact Female,3 months,Domestic Shorthair,Brown Tabby,NaT,NaT,NaT,,,
157529,A893925,,2023-11-30 12:49:00,7501 Bethune Avenue in Austin (TX),Stray,Normal,Cat,Intact Male,4 months,Domestic Shorthair,Blue Tabby,NaT,NaT,NaT,,,
157530,A893926,,2023-11-30 12:27:00,2400 Wickersham Ln Unit #192 in Austin (TX),Stray,Normal,Dog,Intact Male,1 year,Siberian Husky/Great Pyrenees,White,NaT,NaT,NaT,,,


In certain cases, an animal's information may be entered multiple times due to updates in basic details such as `intake_type` or `intake_condition`. Consequently, resolving the issue by solely dropping duplicates at the outset may not suffice. Instead, duplicates will be identified if rows exhibit matching values for `animal_id`, `intake_datetime`, and `outcome_datetime`.

In [19]:
intake_n_outcome.drop_duplicates(subset = ["animal_id", "datetime_intake", "datetime_outcome"],
                                 inplace=True)
intake_n_outcome = intake_n_outcome.loc[:,~intake_n_outcome.columns.duplicated()]
intake_n_outcome

Unnamed: 0,animal_id,name,datetime_intake,found_location,intake_type,intake_condition,animal_type,sex_upon_intake,age_upon_intake,breed,color,datetime_outcome,date_of_birth,outcome_type,outcome_subtype,age_upon_outcome
0,A006100,Scamp,2014-03-07 14:26:00,8700 Research in Austin (TX),Public Assist,Normal,Dog,Neutered Male,6 years,Spinone Italiano Mix,Yellow/White,2014-03-08 17:10:00,2007-07-09,Return to Owner,,6 years
1,A006100,Scamp,2014-12-19 10:21:00,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Neutered Male,7 years,Spinone Italiano Mix,Yellow/White,2014-12-20 16:35:00,2007-07-09,Return to Owner,,7 years
2,A006100,Scamp,2017-12-07 14:07:00,Colony Creek And Hunters Trace in Austin (TX),Stray,Normal,Dog,Neutered Male,10 years,Spinone Italiano Mix,Yellow/White,2017-12-07 00:00:00,2007-07-09,Return to Owner,,10 years
3,A047759,Oreo,2014-04-02 15:55:00,Austin (TX),Owner Surrender,Normal,Dog,Neutered Male,10 years,Dachshund,Tricolor,2014-04-07 15:12:00,2004-04-02,Transfer,Partner,10 years
4,A134067,Bandit,2013-11-16 09:02:00,12034 Research Blvd in Austin (TX),Public Assist,Injured,Dog,Neutered Male,16 years,Shetland Sheepdog,Brown/White,2013-11-16 11:54:00,1997-10-16,Return to Owner,,16 years
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157527,A893921,,2023-11-30 12:25:00,Ridgepoint Dr in Austin (TX),Stray,Normal,Cat,Intact Male,3 months,Domestic Shorthair,Brown Tabby,NaT,NaT,,,
157528,A893922,,2023-11-30 12:25:00,Ridgepoint Dr in Austin (TX),Stray,Normal,Cat,Intact Female,3 months,Domestic Shorthair,Brown Tabby,NaT,NaT,,,
157529,A893925,,2023-11-30 12:49:00,7501 Bethune Avenue in Austin (TX),Stray,Normal,Cat,Intact Male,4 months,Domestic Shorthair,Blue Tabby,NaT,NaT,,,
157530,A893926,,2023-11-30 12:27:00,2400 Wickersham Ln Unit #192 in Austin (TX),Stray,Normal,Dog,Intact Male,1 year,Siberian Husky/Great Pyrenees,White,NaT,NaT,,,


In [20]:
intake_n_outcome.columns

Index(['animal_id', 'name', 'datetime_intake', 'found_location', 'intake_type',
       'intake_condition', 'animal_type', 'sex_upon_intake', 'age_upon_intake',
       'breed', 'color', 'datetime_outcome', 'date_of_birth', 'outcome_type',
       'outcome_subtype', 'age_upon_outcome'],
      dtype='object')

In [21]:
column_order = {
    "animal_id": "animal_id",
    "name": "name",
    "animal_type": "animal_type",
    "sex_upon_intake": "sex",
    "breed": "breed",
    "color": "color",
    "date_of_birth": "date_of_birth",
    "found_location": "found_location",
    "datetime_intake": "intake_datetime",
    "age_upon_intake": "age_upon_intake",
    "datetime_outcome": "outcome_datetime",
    "age_upon_outcome": "age_upon_outcome",
    "intake_type": "intake_type",
    "intake_condition": "intake_condition",
    "outcome_type": "outcome_type",
    "outcome_subtype": "outcome_subtype"
}
intake_n_outcome = intake_n_outcome.rename(columns=column_order)[column_order.values()]
intake_n_outcome

Unnamed: 0,animal_id,name,animal_type,sex,breed,color,date_of_birth,found_location,intake_datetime,age_upon_intake,outcome_datetime,age_upon_outcome,intake_type,intake_condition,outcome_type,outcome_subtype
0,A006100,Scamp,Dog,Neutered Male,Spinone Italiano Mix,Yellow/White,2007-07-09,8700 Research in Austin (TX),2014-03-07 14:26:00,6 years,2014-03-08 17:10:00,6 years,Public Assist,Normal,Return to Owner,
1,A006100,Scamp,Dog,Neutered Male,Spinone Italiano Mix,Yellow/White,2007-07-09,8700 Research Blvd in Austin (TX),2014-12-19 10:21:00,7 years,2014-12-20 16:35:00,7 years,Public Assist,Normal,Return to Owner,
2,A006100,Scamp,Dog,Neutered Male,Spinone Italiano Mix,Yellow/White,2007-07-09,Colony Creek And Hunters Trace in Austin (TX),2017-12-07 14:07:00,10 years,2017-12-07 00:00:00,10 years,Stray,Normal,Return to Owner,
3,A047759,Oreo,Dog,Neutered Male,Dachshund,Tricolor,2004-04-02,Austin (TX),2014-04-02 15:55:00,10 years,2014-04-07 15:12:00,10 years,Owner Surrender,Normal,Transfer,Partner
4,A134067,Bandit,Dog,Neutered Male,Shetland Sheepdog,Brown/White,1997-10-16,12034 Research Blvd in Austin (TX),2013-11-16 09:02:00,16 years,2013-11-16 11:54:00,16 years,Public Assist,Injured,Return to Owner,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157527,A893921,,Cat,Intact Male,Domestic Shorthair,Brown Tabby,NaT,Ridgepoint Dr in Austin (TX),2023-11-30 12:25:00,3 months,NaT,,Stray,Normal,,
157528,A893922,,Cat,Intact Female,Domestic Shorthair,Brown Tabby,NaT,Ridgepoint Dr in Austin (TX),2023-11-30 12:25:00,3 months,NaT,,Stray,Normal,,
157529,A893925,,Cat,Intact Male,Domestic Shorthair,Blue Tabby,NaT,7501 Bethune Avenue in Austin (TX),2023-11-30 12:49:00,4 months,NaT,,Stray,Normal,,
157530,A893926,,Dog,Intact Male,Siberian Husky/Great Pyrenees,White,NaT,2400 Wickersham Ln Unit #192 in Austin (TX),2023-11-30 12:27:00,1 year,NaT,,Stray,Normal,,


# Check final dataset

In [22]:
intake_n_outcome.dtypes

animal_id                   object
name                        object
animal_type                 object
sex                         object
breed                       object
color                       object
date_of_birth       datetime64[ns]
found_location              object
intake_datetime     datetime64[ns]
age_upon_intake             object
outcome_datetime    datetime64[ns]
age_upon_outcome            object
intake_type                 object
intake_condition            object
outcome_type                object
outcome_subtype             object
dtype: object

In [23]:
calculate_age_delta(intake_n_outcome, start="date_of_birth", end="intake_datetime", unit="years", col_suffix="intake")
calculate_age_delta(intake_n_outcome, start="date_of_birth", end="outcome_datetime", unit="years", col_suffix="outcome")
calculate_age_delta(intake_n_outcome, start="intake_datetime", end="outcome_datetime", unit="days")

0         1.0
1         1.0
2        -1.0
3         4.0
4         0.0
         ... 
157527    NaN
157528    NaN
157529    NaN
157530    NaN
157531    NaN
Name: duration(days), Length: 157531, dtype: float64

In [24]:
intake_n_outcome

Unnamed: 0,animal_id,name,animal_type,sex,breed,color,date_of_birth,found_location,intake_datetime,age_upon_intake,outcome_datetime,age_upon_outcome,intake_type,intake_condition,outcome_type,outcome_subtype,age_upon_intake(years),age_upon_outcome(years),duration(days)
0,A006100,Scamp,Dog,Neutered Male,Spinone Italiano Mix,Yellow/White,2007-07-09,8700 Research in Austin (TX),2014-03-07 14:26:00,6 years,2014-03-08 17:10:00,6 years,Public Assist,Normal,Return to Owner,,6.7,6.7,1.0
1,A006100,Scamp,Dog,Neutered Male,Spinone Italiano Mix,Yellow/White,2007-07-09,8700 Research Blvd in Austin (TX),2014-12-19 10:21:00,7 years,2014-12-20 16:35:00,7 years,Public Assist,Normal,Return to Owner,,7.4,7.4,1.0
2,A006100,Scamp,Dog,Neutered Male,Spinone Italiano Mix,Yellow/White,2007-07-09,Colony Creek And Hunters Trace in Austin (TX),2017-12-07 14:07:00,10 years,2017-12-07 00:00:00,10 years,Stray,Normal,Return to Owner,,10.4,10.4,-1.0
3,A047759,Oreo,Dog,Neutered Male,Dachshund,Tricolor,2004-04-02,Austin (TX),2014-04-02 15:55:00,10 years,2014-04-07 15:12:00,10 years,Owner Surrender,Normal,Transfer,Partner,10.0,10.0,4.0
4,A134067,Bandit,Dog,Neutered Male,Shetland Sheepdog,Brown/White,1997-10-16,12034 Research Blvd in Austin (TX),2013-11-16 09:02:00,16 years,2013-11-16 11:54:00,16 years,Public Assist,Injured,Return to Owner,,16.1,16.1,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157527,A893921,,Cat,Intact Male,Domestic Shorthair,Brown Tabby,NaT,Ridgepoint Dr in Austin (TX),2023-11-30 12:25:00,3 months,NaT,,Stray,Normal,,,,,
157528,A893922,,Cat,Intact Female,Domestic Shorthair,Brown Tabby,NaT,Ridgepoint Dr in Austin (TX),2023-11-30 12:25:00,3 months,NaT,,Stray,Normal,,,,,
157529,A893925,,Cat,Intact Male,Domestic Shorthair,Blue Tabby,NaT,7501 Bethune Avenue in Austin (TX),2023-11-30 12:49:00,4 months,NaT,,Stray,Normal,,,,,
157530,A893926,,Dog,Intact Male,Siberian Husky/Great Pyrenees,White,NaT,2400 Wickersham Ln Unit #192 in Austin (TX),2023-11-30 12:27:00,1 year,NaT,,Stray,Normal,,,,,


# Check columns

In [25]:
intake_n_outcome.dtypes

animal_id                          object
name                               object
animal_type                        object
sex                                object
breed                              object
color                              object
date_of_birth              datetime64[ns]
found_location                     object
intake_datetime            datetime64[ns]
age_upon_intake                    object
outcome_datetime           datetime64[ns]
age_upon_outcome                   object
intake_type                        object
intake_condition                   object
outcome_type                       object
outcome_subtype                    object
age_upon_intake(years)            float64
age_upon_outcome(years)           float64
duration(days)                    float64
dtype: object

In [26]:
intake_n_outcome.columns

Index(['animal_id', 'name', 'animal_type', 'sex', 'breed', 'color',
       'date_of_birth', 'found_location', 'intake_datetime', 'age_upon_intake',
       'outcome_datetime', 'age_upon_outcome', 'intake_type',
       'intake_condition', 'outcome_type', 'outcome_subtype',
       'age_upon_intake(years)', 'age_upon_outcome(years)', 'duration(days)'],
      dtype='object')

In [27]:
intake_n_outcome.animal_type.value_counts()

animal_type
Dog          87133
Cat          61332
Other         8261
Bird           779
Livestock       26
Name: count, dtype: int64

In [28]:
intake_n_outcome.breed.value_counts()

breed
Domestic Shorthair Mix             33276
Domestic Shorthair                 17559
Pit Bull Mix                        9638
Labrador Retriever Mix              8167
Chihuahua Shorthair Mix             6737
                                   ...  
Pit Bull/American Foxhound             1
Pit Bull/Dogue De Bordeaux             1
English Springer Spaniel/Beagle        1
Goose Mix                              1
Domestic Shorthair/Oriental Sh         1
Name: count, Length: 2887, dtype: int64

In [None]:
test = {"date_of_birth":["2014-04-02 15:55:00"],
         "date_leave":["2015-04-27 14:45:00"]}
test["date_of_birth"] = pd.to_datetime(test["date_of_birth"])
test["date_leave"] = pd.to_datetime(test["date_leave"])
test_df = pd.DataFrame(test)
print(calculate_age_delta(test_df, "date_of_birth", "date_leave", unit="years", col_suffix="leaving"))

In [None]:
test_intake = pd.read_csv("test_data/test_intake.csv")
test_intake_data = pd.DataFrame(test_intake)
test_outcome = pd.read_csv("test_Data/test_outcome.csv")
test_outcome_data = pd.DataFrame(test_outcome)
    # >>> print(test_intake_data)
merge_intake_n_outcome(test_intake_data, test_outcome_data)

In [None]:
print(test_intake['datetime'])

In [None]:
test_intake.columns