In [1]:
# Imports
import pandas as pd
import numpy as np

In [2]:
# Import intake CSV data
intake_df = pd.read_csv("Resources/Austin_Animal_Center_Intakes.csv")
intake_df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color
0,A786884,*Brock,01/03/2019 04:19:00 PM,01/03/2019 04:19:00 PM,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor
1,A706918,Belle,07/05/2015 12:59:00 PM,07/05/2015 12:59:00 PM,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver
2,A724273,Runster,04/14/2016 06:43:00 PM,04/14/2016 06:43:00 PM,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White
3,A665644,,10/21/2013 07:59:00 AM,10/21/2013 07:59:00 AM,Austin (TX),Stray,Sick,Cat,Intact Female,4 weeks,Domestic Shorthair Mix,Calico
4,A682524,Rio,06/29/2014 10:38:00 AM,06/29/2014 10:38:00 AM,800 Grove Blvd in Austin (TX),Stray,Normal,Dog,Neutered Male,4 years,Doberman Pinsch/Australian Cattle Dog,Tan/Gray


In [3]:
# Import outcome CSV data
outcome_df = pd.read_csv("Resources/Austin_Animal_Center_Outcomes.csv")
outcome_df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
0,A794011,Chunk,05/08/2019 06:20:00 PM,05/08/2019 06:20:00 PM,05/02/2017,Rto-Adopt,,Cat,Neutered Male,2 years,Domestic Shorthair Mix,Brown Tabby/White
1,A776359,Gizmo,07/18/2018 04:02:00 PM,07/18/2018 04:02:00 PM,07/12/2017,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown
2,A821648,,08/16/2020 11:38:00 AM,08/16/2020 11:38:00 AM,08/16/2019,Euthanasia,,Other,Unknown,1 year,Raccoon,Gray
3,A720371,Moose,02/13/2016 05:59:00 PM,02/13/2016 05:59:00 PM,10/08/2015,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff
4,A674754,,03/18/2014 11:47:00 AM,03/18/2014 11:47:00 AM,03/12/2014,Transfer,Partner,Cat,Intact Male,6 days,Domestic Shorthair Mix,Orange Tabby


In [4]:
data = [intake_df["Animal ID"], intake_df["Age upon Intake"]]
headers = ["ID", "Age"]
intake_age_df = pd.concat(data, axis=1, keys=headers)
intake_age_df.head()

Unnamed: 0,ID,Age
0,A786884,2 years
1,A706918,8 years
2,A724273,11 months
3,A665644,4 weeks
4,A682524,4 years


In [5]:
# new data frame with split value columns
new = intake_age_df["Age"].str.split(" ", n = 1, expand = True)

# making separate columns for the number and string from new data frame
intake_age_df["Age_Value"]= new[0]
intake_age_df["Age_Unit"]= new[1]

intake_age_df.head()

Unnamed: 0,ID,Age,Age_Value,Age_Unit
0,A786884,2 years,2,years
1,A706918,8 years,8,years
2,A724273,11 months,11,months
3,A665644,4 weeks,4,weeks
4,A682524,4 years,4,years


In [6]:
unit_counts = intake_age_df.Age_Unit.value_counts()
unit_counts

years     53682
months    28405
year      23251
month     13349
weeks     13057
days       2138
week       1110
day         716
Name: Age_Unit, dtype: int64

In [7]:
# convert ages in months to ages in years

for row in intake_age_df.index:
    if((intake_age_df["Age_Unit"].values[row] == "months") | (intake_age_df["Age_Unit"].values[row] == "month")):
        intake_age_df["Age_Value"].values[row] = float(intake_age_df["Age_Value"].values[row])/12
    if((intake_age_df["Age_Unit"].values[row] == "weeks") | (intake_age_df["Age_Unit"].values[row] == "week")):
        intake_age_df["Age_Value"].values[row] = float(intake_age_df["Age_Value"].values[row])/52
    if((intake_age_df["Age_Unit"].values[row] == "days") | (intake_age_df["Age_Unit"].values[row] == "day")):
        intake_age_df["Age_Value"].values[row] = float(intake_age_df["Age_Value"].values[row])/365
    if((intake_age_df["Age_Unit"].values[row] == "years") | (intake_age_df["Age_Unit"].values[row] == "year")):
        intake_age_df["Age_Value"].values[row] = float(intake_age_df["Age_Value"].values[row])
        
intake_age_df.head()

Unnamed: 0,ID,Age,Age_Value,Age_Unit
0,A786884,2 years,2.0,years
1,A706918,8 years,8.0,years
2,A724273,11 months,0.916667,months
3,A665644,4 weeks,0.0769231,weeks
4,A682524,4 years,4.0,years


In [8]:
# put converted age back into intake dataframe
intake_df["Age_in_Years"] = intake_age_df["Age_Value"]
intake_df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color,Age_in_Years
0,A786884,*Brock,01/03/2019 04:19:00 PM,01/03/2019 04:19:00 PM,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor,2.0
1,A706918,Belle,07/05/2015 12:59:00 PM,07/05/2015 12:59:00 PM,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver,8.0
2,A724273,Runster,04/14/2016 06:43:00 PM,04/14/2016 06:43:00 PM,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White,0.916667
3,A665644,,10/21/2013 07:59:00 AM,10/21/2013 07:59:00 AM,Austin (TX),Stray,Sick,Cat,Intact Female,4 weeks,Domestic Shorthair Mix,Calico,0.0769231
4,A682524,Rio,06/29/2014 10:38:00 AM,06/29/2014 10:38:00 AM,800 Grove Blvd in Austin (TX),Stray,Normal,Dog,Neutered Male,4 years,Doberman Pinsch/Australian Cattle Dog,Tan/Gray,4.0


In [9]:
intake_df = intake_df.loc[intake_df['Animal Type'] == 'Dog']
print(intake_df.shape)
intake_df = intake_df.reset_index(drop=True)
intake_df.head(10)

(76356, 13)


Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color,Age_in_Years
0,A786884,*Brock,01/03/2019 04:19:00 PM,01/03/2019 04:19:00 PM,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor,2.0
1,A706918,Belle,07/05/2015 12:59:00 PM,07/05/2015 12:59:00 PM,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver,8.0
2,A724273,Runster,04/14/2016 06:43:00 PM,04/14/2016 06:43:00 PM,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White,0.916667
3,A682524,Rio,06/29/2014 10:38:00 AM,06/29/2014 10:38:00 AM,800 Grove Blvd in Austin (TX),Stray,Normal,Dog,Neutered Male,4 years,Doberman Pinsch/Australian Cattle Dog,Tan/Gray,4.0
4,A743852,Odin,02/18/2017 12:46:00 PM,02/18/2017 12:46:00 PM,Austin (TX),Owner Surrender,Normal,Dog,Neutered Male,2 years,Labrador Retriever Mix,Chocolate,2.0
5,A635072,Beowulf,04/16/2019 09:53:00 AM,04/16/2019 09:53:00 AM,415 East Mary Street in Austin (TX),Public Assist,Normal,Dog,Neutered Male,6 years,Great Dane Mix,Black,6.0
6,A708452,Mumble,07/30/2015 02:37:00 PM,07/30/2015 02:37:00 PM,Austin (TX),Public Assist,Normal,Dog,Intact Male,2 years,Labrador Retriever Mix,Black/White,2.0
7,A760053,,10/11/2017 03:46:00 PM,10/11/2017 03:46:00 PM,8800 South First Street in Austin (TX),Stray,Normal,Dog,Intact Male,2 years,Chihuahua Shorthair,White/Tan,2.0
8,A707375,*Candy Cane,07/11/2015 06:19:00 PM,07/11/2015 06:19:00 PM,Galilee Court And Damita Jo Dr in Manor (TX),Stray,Normal,Dog,Intact Female,5 months,Pit Bull,Brown/White,0.416667
9,A696408,*Pearl,02/04/2015 12:58:00 PM,02/04/2015 12:58:00 PM,9705 Thaxton in Austin (TX),Stray,Normal,Dog,Intact Female,2 years,Chihuahua Shorthair,Tricolor,2.0


In [10]:
breed_counts = intake_df.Breed.value_counts()
breed_counts

Pit Bull Mix                                         8854
Labrador Retriever Mix                               7313
Chihuahua Shorthair Mix                              6459
German Shepherd Mix                                  3273
Pit Bull                                             1837
Australian Cattle Dog Mix                            1627
Chihuahua Shorthair                                  1301
Labrador Retriever                                   1291
German Shepherd                                      1137
Dachshund Mix                                        1084
Boxer Mix                                            1035
Border Collie Mix                                    1000
Miniature Poodle Mix                                  871
Siberian Husky Mix                                    751
Australian Shepherd Mix                               737
Catahoula Mix                                         736
Staffordshire Mix                                     716
Rat Terrier Mi

In [11]:
data2 = [intake_df["Animal ID"], intake_df["Breed"]]
headers = ["ID", "Breed"]
intake_breed_df = pd.concat(data2, axis=1, keys=headers)
intake_breed_df.head()

Unnamed: 0,ID,Breed
0,A786884,Beagle Mix
1,A706918,English Springer Spaniel
2,A724273,Basenji Mix
3,A682524,Doberman Pinsch/Australian Cattle Dog
4,A743852,Labrador Retriever Mix


In [12]:
# new data frame with split value columns
new2 = intake_breed_df["Breed"].str.split(" ", expand = True)

# making separate columns for the number and string from new data frame
intake_breed_df["Breed1"]= new2[0]
intake_breed_df["Breed2"]= new2[1]
intake_breed_df["Breed3"]= new2[2]
intake_breed_df["Breed4"]= new2[3]
intake_breed_df["Breed5"]= new2[4]
intake_breed_df['Breed5'] = intake_breed_df['Breed5'].fillna(0)
intake_breed_df['Breed4'] = intake_breed_df['Breed4'].fillna(0)
intake_breed_df['Breed3'] = intake_breed_df['Breed3'].fillna(0)
intake_breed_df['Breed2'] = intake_breed_df['Breed2'].fillna(0)
intake_breed_df.head()

Unnamed: 0,ID,Breed,Breed1,Breed2,Breed3,Breed4,Breed5
0,A786884,Beagle Mix,Beagle,Mix,0,0,0
1,A706918,English Springer Spaniel,English,Springer,Spaniel,0,0
2,A724273,Basenji Mix,Basenji,Mix,0,0,0
3,A682524,Doberman Pinsch/Australian Cattle Dog,Doberman,Pinsch/Australian,Cattle,Dog,0
4,A743852,Labrador Retriever Mix,Labrador,Retriever,Mix,0,0


In [13]:
# convert ages in months to ages in years
for row in (intake_breed_df.index):
    if((intake_breed_df["Breed2"].values[row] == "Mix") | (intake_breed_df["Breed2"].values[row] == 0)):
        intake_breed_df["Breed2"].values[row] = ""
    if((intake_breed_df["Breed3"].values[row] == "Mix") | (intake_breed_df["Breed3"].values[row] == 0)):
        intake_breed_df["Breed3"].values[row] = ""
    if((intake_breed_df["Breed4"].values[row] == "Mix") | (intake_breed_df["Breed4"].values[row] == 0)):
        intake_breed_df["Breed4"].values[row] = ""
    if((intake_breed_df["Breed5"].values[row] == "Mix") | (intake_breed_df["Breed5"].values[row] == 0)):
        intake_breed_df["Breed5"].values[row] = ""

        
intake_breed_df.head()


Unnamed: 0,ID,Breed,Breed1,Breed2,Breed3,Breed4,Breed5
0,A786884,Beagle Mix,Beagle,,,,
1,A706918,English Springer Spaniel,English,Springer,Spaniel,,
2,A724273,Basenji Mix,Basenji,,,,
3,A682524,Doberman Pinsch/Australian Cattle Dog,Doberman,Pinsch/Australian,Cattle,Dog,
4,A743852,Labrador Retriever Mix,Labrador,Retriever,,,


In [14]:
intake_breed_df["updated"]=intake_breed_df["Breed1"]+' '+intake_breed_df["Breed2"]+' '+intake_breed_df["Breed3"]+' '+intake_breed_df["Breed4"]+' '+intake_breed_df["Breed5"]
intake_breed_df.head()


Unnamed: 0,ID,Breed,Breed1,Breed2,Breed3,Breed4,Breed5,updated
0,A786884,Beagle Mix,Beagle,,,,,Beagle
1,A706918,English Springer Spaniel,English,Springer,Spaniel,,,English Springer Spaniel
2,A724273,Basenji Mix,Basenji,,,,,Basenji
3,A682524,Doberman Pinsch/Australian Cattle Dog,Doberman,Pinsch/Australian,Cattle,Dog,,Doberman Pinsch/Australian Cattle Dog
4,A743852,Labrador Retriever Mix,Labrador,Retriever,,,,Labrador Retriever


In [15]:
data3 = [intake_df["Animal ID"], intake_df["Sex upon Intake"]]
headers = ["ID", "Intake_Sex"]
intake_sex_df = pd.concat(data3, axis=1, keys=headers)
intake_sex_df.head()

Unnamed: 0,ID,Intake_Sex
0,A786884,Neutered Male
1,A706918,Spayed Female
2,A724273,Intact Male
3,A682524,Neutered Male
4,A743852,Neutered Male


In [16]:
# new data frame with split value columns
new = intake_sex_df["Intake_Sex"].str.split(" ", n = 1, expand = True)

# making separate columns for the classifiers
intake_sex_df["Intactness"]= new[0]
intake_sex_df["Sex"]= new[1]

intake_sex_df.head()

Unnamed: 0,ID,Intake_Sex,Intactness,Sex
0,A786884,Neutered Male,Neutered,Male
1,A706918,Spayed Female,Spayed,Female
2,A724273,Intact Male,Intact,Male
3,A682524,Neutered Male,Neutered,Male
4,A743852,Neutered Male,Neutered,Male


In [17]:
# put converted intactness and Sex back into intake dataframe
intake_df["Intactness"] = intake_sex_df["Intactness"]
intake_df["Sex"] = intake_sex_df["Sex"]
intake_df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color,Age_in_Years,Intactness,Sex
0,A786884,*Brock,01/03/2019 04:19:00 PM,01/03/2019 04:19:00 PM,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor,2.0,Neutered,Male
1,A706918,Belle,07/05/2015 12:59:00 PM,07/05/2015 12:59:00 PM,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver,8.0,Spayed,Female
2,A724273,Runster,04/14/2016 06:43:00 PM,04/14/2016 06:43:00 PM,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White,0.916667,Intact,Male
3,A682524,Rio,06/29/2014 10:38:00 AM,06/29/2014 10:38:00 AM,800 Grove Blvd in Austin (TX),Stray,Normal,Dog,Neutered Male,4 years,Doberman Pinsch/Australian Cattle Dog,Tan/Gray,4.0,Neutered,Male
4,A743852,Odin,02/18/2017 12:46:00 PM,02/18/2017 12:46:00 PM,Austin (TX),Owner Surrender,Normal,Dog,Neutered Male,2 years,Labrador Retriever Mix,Chocolate,2.0,Neutered,Male


In [18]:
bin_labels_age = ['Puppy', 'Young', 'Adult', 'Senior']
intake_df['Age'] = pd.cut(intake_df['Age_in_Years'],[0, 1, 3, 7, 20],labels=bin_labels_age)
intake_df.head()

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color,Age_in_Years,Intactness,Sex,Age
0,A786884,*Brock,01/03/2019 04:19:00 PM,01/03/2019 04:19:00 PM,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor,2.0,Neutered,Male,Young
1,A706918,Belle,07/05/2015 12:59:00 PM,07/05/2015 12:59:00 PM,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver,8.0,Spayed,Female,Senior
2,A724273,Runster,04/14/2016 06:43:00 PM,04/14/2016 06:43:00 PM,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White,0.916667,Intact,Male,Puppy
3,A682524,Rio,06/29/2014 10:38:00 AM,06/29/2014 10:38:00 AM,800 Grove Blvd in Austin (TX),Stray,Normal,Dog,Neutered Male,4 years,Doberman Pinsch/Australian Cattle Dog,Tan/Gray,4.0,Neutered,Male,Adult
4,A743852,Odin,02/18/2017 12:46:00 PM,02/18/2017 12:46:00 PM,Austin (TX),Owner Surrender,Normal,Dog,Neutered Male,2 years,Labrador Retriever Mix,Chocolate,2.0,Neutered,Male,Young


In [19]:
# create new column for breed type

intake_df["Breed_Type"] = pd.np.where(intake_df.Breed.str.contains("Mix"), "Mix",pd.np.where(intake_df.Breed.str.contains("/"), "Mix", "Pure"))
       
intake_df

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color,Age_in_Years,Intactness,Sex,Age,Breed_Type
0,A786884,*Brock,01/03/2019 04:19:00 PM,01/03/2019 04:19:00 PM,2501 Magin Meadow Dr in Austin (TX),Stray,Normal,Dog,Neutered Male,2 years,Beagle Mix,Tricolor,2,Neutered,Male,Young,Mix
1,A706918,Belle,07/05/2015 12:59:00 PM,07/05/2015 12:59:00 PM,9409 Bluegrass Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,8 years,English Springer Spaniel,White/Liver,8,Spayed,Female,Senior,Pure
2,A724273,Runster,04/14/2016 06:43:00 PM,04/14/2016 06:43:00 PM,2818 Palomino Trail in Austin (TX),Stray,Normal,Dog,Intact Male,11 months,Basenji Mix,Sable/White,0.916667,Intact,Male,Puppy,Mix
3,A682524,Rio,06/29/2014 10:38:00 AM,06/29/2014 10:38:00 AM,800 Grove Blvd in Austin (TX),Stray,Normal,Dog,Neutered Male,4 years,Doberman Pinsch/Australian Cattle Dog,Tan/Gray,4,Neutered,Male,Adult,Mix
4,A743852,Odin,02/18/2017 12:46:00 PM,02/18/2017 12:46:00 PM,Austin (TX),Owner Surrender,Normal,Dog,Neutered Male,2 years,Labrador Retriever Mix,Chocolate,2,Neutered,Male,Young,Mix
5,A635072,Beowulf,04/16/2019 09:53:00 AM,04/16/2019 09:53:00 AM,415 East Mary Street in Austin (TX),Public Assist,Normal,Dog,Neutered Male,6 years,Great Dane Mix,Black,6,Neutered,Male,Adult,Mix
6,A708452,Mumble,07/30/2015 02:37:00 PM,07/30/2015 02:37:00 PM,Austin (TX),Public Assist,Normal,Dog,Intact Male,2 years,Labrador Retriever Mix,Black/White,2,Intact,Male,Young,Mix
7,A760053,,10/11/2017 03:46:00 PM,10/11/2017 03:46:00 PM,8800 South First Street in Austin (TX),Stray,Normal,Dog,Intact Male,2 years,Chihuahua Shorthair,White/Tan,2,Intact,Male,Young,Pure
8,A707375,*Candy Cane,07/11/2015 06:19:00 PM,07/11/2015 06:19:00 PM,Galilee Court And Damita Jo Dr in Manor (TX),Stray,Normal,Dog,Intact Female,5 months,Pit Bull,Brown/White,0.416667,Intact,Female,Puppy,Pure
9,A696408,*Pearl,02/04/2015 12:58:00 PM,02/04/2015 12:58:00 PM,9705 Thaxton in Austin (TX),Stray,Normal,Dog,Intact Female,2 years,Chihuahua Shorthair,Tricolor,2,Intact,Female,Young,Pure


In [20]:
outcome_df = outcome_df.loc[outcome_df['Animal Type'] == 'Dog']
print(outcome_df.shape)
outcome_df = outcome_df.reset_index(drop=True)
outcome_df.head(10)

(76374, 12)


Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
0,A776359,Gizmo,07/18/2018 04:02:00 PM,07/18/2018 04:02:00 PM,07/12/2017,Adoption,,Dog,Neutered Male,1 year,Chihuahua Shorthair Mix,White/Brown
1,A720371,Moose,02/13/2016 05:59:00 PM,02/13/2016 05:59:00 PM,10/08/2015,Adoption,,Dog,Neutered Male,4 months,Anatol Shepherd/Labrador Retriever,Buff
2,A659412,Princess,10/05/2020 02:37:00 PM,10/05/2020 02:37:00 PM,03/24/2013,Adoption,,Dog,Spayed Female,7 years,Chihuahua Shorthair Mix,Brown
3,A814515,Quentin,05/06/2020 07:59:00 AM,05/06/2020 07:59:00 AM,03/01/2018,Adoption,Foster,Dog,Neutered Male,2 years,American Foxhound/Labrador Retriever,White/Brown
4,A840370,Tulip,08/19/2021 07:36:00 PM,08/19/2021 07:36:00 PM,08/06/2019,Adoption,,Dog,Spayed Female,2 years,Border Collie/Cardigan Welsh Corgi,Black/White
5,A818049,Fiona,06/01/2020 01:24:00 PM,06/01/2020 01:24:00 PM,06/01/2018,Return to Owner,,Dog,Intact Female,2 years,Pit Bull,White/Blue
6,A843327,*Mary,10/08/2021 01:25:00 PM,10/08/2021 01:25:00 PM,09/29/2019,Transfer,Out State,Dog,Intact Female,2 years,Chihuahua Shorthair Mix,Black/White
7,A765349,Einstein,06/08/2018 01:04:00 PM,06/08/2018 01:04:00 PM,01/18/2009,Adoption,Foster,Dog,Neutered Male,9 years,Chihuahua Shorthair Mix,Tricolor
8,A760697,Star,10/26/2017 03:22:00 PM,10/26/2017 03:22:00 PM,10/23/2007,Transfer,Partner,Dog,Intact Male,10 years,Yorkshire Terrier Mix,Brown/Black
9,A767231,Millie,02/25/2018 05:19:00 PM,02/25/2018 05:19:00 PM,02/25/2017,Return to Owner,,Dog,Spayed Female,1 year,Jack Russell Terrier/Chihuahua Shorthair,White/Tan


In [21]:
intake_sort_df = intake_df.sort_values(by=['DateTime'], ascending=True)

intake_sort_df

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color,Age_in_Years,Intactness,Sex,Age,Breed_Type
20251,A670057,Brownie,01/01/2014 01:05:00 PM,01/01/2014 01:05:00 PM,S Lamar And Ben White in Austin (TX),Stray,Normal,Dog,Intact Male,2 years,Chow Chow Mix,Red,2,Intact,Male,Young,Mix
14972,A670059,,01/01/2014 01:31:00 PM,01/01/2014 01:31:00 PM,11402 Robert Wooding in Austin (TX),Stray,Normal,Dog,Spayed Female,1 year,West Highland,White,1,Spayed,Female,Puppy,Pure
58174,A670061,Koda,01/01/2014 01:33:00 PM,01/01/2014 01:33:00 PM,Austin (TX),Owner Surrender,Normal,Dog,Intact Male,2 years,Chow Chow Mix,Red,2,Intact,Male,Young,Mix
29263,A670064,,01/01/2014 01:57:00 PM,01/01/2014 01:57:00 PM,Gunter St And Gonzalez in Austin (TX),Stray,Normal,Dog,Intact Female,3 months,Pit Bull/Pit Bull,Red,0.25,Intact,Female,Puppy,Mix
24785,A670065,Muneca,01/01/2014 02:11:00 PM,01/01/2014 02:11:00 PM,Applewood Dr in Austin (TX),Stray,Normal,Dog,Spayed Female,7 years,Australian Shepherd/Chow Chow,Red/White,7,Spayed,Female,Adult,Mix
22937,A668374,Princessa,01/01/2014 02:42:00 PM,01/01/2014 02:42:00 PM,Austin (TX),Owner Surrender,Normal,Dog,Intact Female,5 months,Chihuahua Shorthair Mix,Tan,0.416667,Intact,Female,Puppy,Mix
23796,A668372,*Greta,01/01/2014 02:42:00 PM,01/01/2014 02:42:00 PM,Austin (TX),Owner Surrender,Normal,Dog,Intact Female,11 months,Pit Bull Mix,Tan,0.916667,Intact,Female,Puppy,Mix
43342,A670068,Duke,01/01/2014 03:01:00 PM,01/01/2014 03:01:00 PM,S 1St And Stassney in Austin (TX),Stray,Normal,Dog,Neutered Male,6 years,Pit Bull Mix,Fawn,6,Neutered,Male,Adult,Mix
16631,A670070,,01/01/2014 03:24:00 PM,01/01/2014 03:24:00 PM,North Ec in Austin (TX),Stray,Injured,Dog,Intact Female,3 years,Chihuahua Shorthair Mix,Tan,3,Intact,Female,Young,Mix
10544,A670076,Apollo,01/01/2014 03:52:00 PM,01/01/2014 03:52:00 PM,Long Point Dr in Austin (TX),Stray,Normal,Dog,Intact Male,1 year,Pit Bull Mix,Blue/White,1,Intact,Male,Puppy,Mix


In [22]:
outcome_sort_df = outcome_df.sort_values(by=['DateTime'], ascending=True)

outcome_sort_df

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Date of Birth,Outcome Type,Outcome Subtype,Animal Type,Sex upon Outcome,Age upon Outcome,Breed,Color
56561,A669926,,01/01/2014 01:00:00 PM,01/01/2014 01:00:00 PM,05/30/2013,Transfer,Partner,Dog,Intact Female,7 months,Plott Hound/Black/Tan Hound,Brown/Tan
61744,A668314,Duke,01/01/2014 01:30:00 PM,01/01/2014 01:30:00 PM,12/02/2012,Transfer,Partner,Dog,Neutered Male,1 year,Labrador Retriever Mix,Chocolate/White
47473,A666770,Josh,01/01/2014 01:32:00 PM,01/01/2014 01:32:00 PM,11/07/2010,Adoption,,Dog,Neutered Male,3 years,Siberian Husky Mix,Black/Brown
41048,A668337,*Tony,01/01/2014 01:42:00 PM,01/01/2014 01:42:00 PM,12/02/2012,Euthanasia,Behavior,Dog,Intact Male,1 year,Pit Bull Mix,Brown Brindle/White
22109,A663273,Diamond,01/01/2014 01:44:00 PM,01/01/2014 01:44:00 PM,09/16/2011,Euthanasia,Behavior,Dog,Spayed Female,2 years,Catahoula Mix,Tan/Yellow Brindle
10097,A643402,Scuffy,01/01/2014 02:38:00 PM,01/01/2014 02:38:00 PM,10/01/2012,Adoption,,Dog,Spayed Female,1 year,Border Collie/Pit Bull,Black/White
24188,A668519,Joe,01/01/2014 02:46:00 PM,01/01/2014 02:46:00 PM,12/19/2012,Transfer,Partner,Dog,Neutered Male,1 year,Miniature Poodle,White/Apricot
26199,A667815,Joey,01/01/2014 03:08:00 PM,01/01/2014 03:08:00 PM,11/23/2012,Transfer,Partner,Dog,Intact Male,1 year,Chihuahua Shorthair Mix,Black/Tan
54299,A669933,Leroy,01/01/2014 03:17:00 PM,01/01/2014 03:17:00 PM,12/30/2008,Transfer,Partner,Dog,Neutered Male,5 years,Beagle,Blue Merle
62299,A670032,Patches,01/01/2014 03:17:00 PM,01/01/2014 03:17:00 PM,01/01/2008,Return to Owner,,Dog,Intact Female,6 years,Catahoula Mix,Blue Merle/White


In [23]:
intake_search_df = intake_df.loc[intake_df['Animal ID'] == 'A668314']
intake_search_df

Unnamed: 0,Animal ID,Name,DateTime,MonthYear,Found Location,Intake Type,Intake Condition,Animal Type,Sex upon Intake,Age upon Intake,Breed,Color,Age_in_Years,Intactness,Sex,Age,Breed_Type
20910,A668314,Duke,12/29/2013 11:42:00 AM,12/29/2013 11:42:00 AM,Austin (TX),Owner Surrender,Normal,Dog,Neutered Male,1 year,Labrador Retriever Mix,Chocolate/White,1,Neutered,Male,Puppy,Mix
60404,A668314,Duke,12/02/2013 08:00:00 AM,12/02/2013 08:00:00 AM,183 & Duval in Austin (TX),Stray,Normal,Dog,Intact Male,1 year,Labrador Retriever Mix,Chocolate/White,1,Intact,Male,Puppy,Mix


In [24]:
intake_sort_df = intake_sort_df.drop(columns=['MonthYear', 'Sex upon Intake','Age upon Intake'])
intake_sort_df.head()

Unnamed: 0,Animal ID,Name,DateTime,Found Location,Intake Type,Intake Condition,Animal Type,Breed,Color,Age_in_Years,Intactness,Sex,Age,Breed_Type
20251,A670057,Brownie,01/01/2014 01:05:00 PM,S Lamar And Ben White in Austin (TX),Stray,Normal,Dog,Chow Chow Mix,Red,2.0,Intact,Male,Young,Mix
14972,A670059,,01/01/2014 01:31:00 PM,11402 Robert Wooding in Austin (TX),Stray,Normal,Dog,West Highland,White,1.0,Spayed,Female,Puppy,Pure
58174,A670061,Koda,01/01/2014 01:33:00 PM,Austin (TX),Owner Surrender,Normal,Dog,Chow Chow Mix,Red,2.0,Intact,Male,Young,Mix
29263,A670064,,01/01/2014 01:57:00 PM,Gunter St And Gonzalez in Austin (TX),Stray,Normal,Dog,Pit Bull/Pit Bull,Red,0.25,Intact,Female,Puppy,Mix
24785,A670065,Muneca,01/01/2014 02:11:00 PM,Applewood Dr in Austin (TX),Stray,Normal,Dog,Australian Shepherd/Chow Chow,Red/White,7.0,Spayed,Female,Adult,Mix


In [25]:
outcome_sort_df = outcome_sort_df.drop(columns=['MonthYear', 'Date of Birth','Outcome Subtype','Animal Type','Sex upon Outcome','Age upon Outcome','Breed','Color'])
outcome_sort_df.head()

Unnamed: 0,Animal ID,Name,DateTime,Outcome Type
56561,A669926,,01/01/2014 01:00:00 PM,Transfer
61744,A668314,Duke,01/01/2014 01:30:00 PM,Transfer
47473,A666770,Josh,01/01/2014 01:32:00 PM,Adoption
41048,A668337,*Tony,01/01/2014 01:42:00 PM,Euthanasia
22109,A663273,Diamond,01/01/2014 01:44:00 PM,Euthanasia


In [26]:
from datetime import datetime
intake_sort_df['in_Y_m_d'] = pd.to_datetime(intake_sort_df.DateTime)
intake_sort_df['in_Y_m_d'] = intake_sort_df['in_Y_m_d'].dt.strftime('%Y/%m/%d')
intake_sort_df['in_Y_m_d']=intake_sort_df['in_Y_m_d'].astype(str)

intake_sort_df = intake_sort_df.sort_values(['Animal ID','in_Y_m_d'], ascending=[True,True])
intake_sort_df = intake_sort_df.reset_index(drop=True)
print(intake_sort_df.shape)
intake_sort_df

(76356, 15)


Unnamed: 0,Animal ID,Name,DateTime,Found Location,Intake Type,Intake Condition,Animal Type,Breed,Color,Age_in_Years,Intactness,Sex,Age,Breed_Type,in_Y_m_d
0,A006100,Scamp,03/07/2014 02:26:00 PM,8700 Research in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,6,Neutered,Male,Adult,Mix,2014/03/07
1,A006100,Scamp,12/19/2014 10:21:00 AM,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,7,Neutered,Male,Adult,Mix,2014/12/19
2,A006100,Scamp,12/07/2017 02:07:00 PM,Colony Creek And Hunters Trace in Austin (TX),Stray,Normal,Dog,Spinone Italiano Mix,Yellow/White,10,Neutered,Male,Senior,Mix,2017/12/07
3,A047759,Oreo,04/02/2014 03:55:00 PM,Austin (TX),Owner Surrender,Normal,Dog,Dachshund,Tricolor,10,Neutered,Male,Senior,Pure,2014/04/02
4,A134067,Bandit,11/16/2013 09:02:00 AM,12034 Research Blvd in Austin (TX),Public Assist,Injured,Dog,Shetland Sheepdog,Brown/White,16,Neutered,Male,Senior,Pure,2013/11/16
5,A141142,Bettie,11/16/2013 02:46:00 PM,Austin (TX),Stray,Aged,Dog,Labrador Retriever/Pit Bull,Black/White,15,Spayed,Female,Senior,Mix,2013/11/16
6,A163459,Sasha,11/14/2014 03:11:00 PM,Ih 35 And 41St St in Austin (TX),Stray,Normal,Dog,Miniature Schnauzer Mix,Black/Gray,15,Intact,Female,Senior,Mix,2014/11/14
7,A165752,Pep,09/15/2014 11:28:00 AM,Gatlin Gun Rd And Brodie in Austin (TX),Stray,Normal,Dog,Lhasa Apso Mix,Brown/White,15,Neutered,Male,Senior,Mix,2014/09/15
8,A178569,Boti,03/17/2014 09:45:00 AM,Austin (TX),Public Assist,Normal,Dog,Shetland Sheepdog Mix,White/Black,15,Neutered,Male,Senior,Mix,2014/03/17
9,A189592,Ophelia,09/18/2015 05:46:00 PM,Chesney And Slaughter in Austin (TX),Stray,Normal,Dog,Shetland Sheepdog Mix,Brown/White,18,Spayed,Female,Senior,Mix,2015/09/18


In [27]:
outcome_sort_df['out_Y_m_d'] = pd.to_datetime(outcome_sort_df.DateTime)
outcome_sort_df['out_Y_m_d'] = outcome_sort_df['out_Y_m_d'].dt.strftime('%Y/%m/%d')
outcome_sort_df['out_Y_m_d']=outcome_sort_df['out_Y_m_d'].astype(str)

outcome_sort_df = outcome_sort_df.sort_values(['Animal ID','out_Y_m_d'], ascending=[True,True])
outcome_sort_df = outcome_sort_df.reset_index(drop=True)
print(outcome_sort_df.shape)
outcome_sort_df

(76374, 5)


Unnamed: 0,Animal ID,Name,DateTime,Outcome Type,out_Y_m_d
0,A006100,Scamp,03/08/2014 05:10:00 PM,Return to Owner,2014/03/08
1,A006100,Scamp,12/20/2014 04:35:00 PM,Return to Owner,2014/12/20
2,A006100,Scamp,12/07/2017 12:00:00 AM,Return to Owner,2017/12/07
3,A047759,Oreo,04/07/2014 03:12:00 PM,Transfer,2014/04/07
4,A134067,Bandit,11/16/2013 11:54:00 AM,Return to Owner,2013/11/16
5,A141142,Bettie,11/17/2013 11:40:00 AM,Return to Owner,2013/11/17
6,A163459,Sasha,11/14/2014 07:28:00 PM,Return to Owner,2014/11/14
7,A165752,Pep,09/15/2014 04:35:00 PM,Return to Owner,2014/09/15
8,A178569,Boti,03/23/2014 03:57:00 PM,Return to Owner,2014/03/23
9,A189592,Ophelia,09/18/2015 07:04:00 PM,Return to Owner,2015/09/18


In [28]:
print('row 90')
print('----------------------------------------------')
print(intake_sort_df.loc[[90]])
print(outcome_sort_df.loc[[90]])
print('row 91')
print('----------------------------------------------')
print(intake_sort_df.loc[[91]])
print(outcome_sort_df.loc[[91]])
print('row 92')
print('----------------------------------------------')
print(intake_sort_df.loc[[92]])
print(outcome_sort_df.loc[[92]])
print('row 93')
print('----------------------------------------------')
print(intake_sort_df.loc[[93]])
print(outcome_sort_df.loc[[93]])
print('row 94')
print('----------------------------------------------')
print(intake_sort_df.loc[[94]])
print(outcome_sort_df.loc[[94]])
print('row 95')
print('----------------------------------------------')
print(intake_sort_df.loc[[95]])
print(outcome_sort_df.loc[[95]])
print('row 96')
print('----------------------------------------------')
print(intake_sort_df.loc[[96]])
print(outcome_sort_df.loc[[96]])
print('row 97')
print('----------------------------------------------')
print(intake_sort_df.loc[[97]])
print(outcome_sort_df.loc[[97]])
print('row 98')
print('----------------------------------------------')
print(intake_sort_df.loc[[98]])
print(outcome_sort_df.loc[[98]])
print('row 99')
print('----------------------------------------------')
print(intake_sort_df.loc[[99]])
print(outcome_sort_df.loc[[99]])

row 90
----------------------------------------------
   Animal ID   Name                DateTime                  Found Location  \
90   A333780  Romeo  05/18/2016 12:55:00 PM  Meadow Creek Dr in Austin (TX)   

   Intake Type Intake Condition Animal Type                    Breed  \
90       Stray           Normal         Dog  Chihuahua Shorthair Mix   

       Color Age_in_Years Intactness   Sex     Age Breed_Type    in_Y_m_d  
90  Tricolor           12   Neutered  Male  Senior        Mix  2016/05/18  
   Animal ID   Name                DateTime     Outcome Type   out_Y_m_d
90   A333780  Romeo  05/18/2016 03:37:00 PM  Return to Owner  2016/05/18
row 91
----------------------------------------------
   Animal ID    Name                DateTime                   Found Location  \
91   A333781  Junior  05/16/2016 02:09:00 PM  5601 Manchaca Rd in Austin (TX)   

   Intake Type Intake Condition Animal Type                    Breed  \
91       Stray           Normal         Dog  Chihuahua 

In [None]:
intake_sort_df.to_csv('Resources/intake_sorted.csv')
outcome_sort_df.to_csv('Resources/outcome_sorted.csv')

In [29]:
data_df=pd.merge(intake_sort_df, outcome_sort_df, on="Animal ID")
#data_df = data_df.sort_values(['Animal ID'], ascending=[True])
data_df

Unnamed: 0,Animal ID,Name_x,DateTime_x,Found Location,Intake Type,Intake Condition,Animal Type,Breed,Color,Age_in_Years,Intactness,Sex,Age,Breed_Type,in_Y_m_d,Name_y,DateTime_y,Outcome Type,out_Y_m_d
0,A006100,Scamp,03/07/2014 02:26:00 PM,8700 Research in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,6,Neutered,Male,Adult,Mix,2014/03/07,Scamp,03/08/2014 05:10:00 PM,Return to Owner,2014/03/08
1,A006100,Scamp,03/07/2014 02:26:00 PM,8700 Research in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,6,Neutered,Male,Adult,Mix,2014/03/07,Scamp,12/20/2014 04:35:00 PM,Return to Owner,2014/12/20
2,A006100,Scamp,03/07/2014 02:26:00 PM,8700 Research in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,6,Neutered,Male,Adult,Mix,2014/03/07,Scamp,12/07/2017 12:00:00 AM,Return to Owner,2017/12/07
3,A006100,Scamp,12/19/2014 10:21:00 AM,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,7,Neutered,Male,Adult,Mix,2014/12/19,Scamp,03/08/2014 05:10:00 PM,Return to Owner,2014/03/08
4,A006100,Scamp,12/19/2014 10:21:00 AM,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,7,Neutered,Male,Adult,Mix,2014/12/19,Scamp,12/20/2014 04:35:00 PM,Return to Owner,2014/12/20
5,A006100,Scamp,12/19/2014 10:21:00 AM,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,7,Neutered,Male,Adult,Mix,2014/12/19,Scamp,12/07/2017 12:00:00 AM,Return to Owner,2017/12/07
6,A006100,Scamp,12/07/2017 02:07:00 PM,Colony Creek And Hunters Trace in Austin (TX),Stray,Normal,Dog,Spinone Italiano Mix,Yellow/White,10,Neutered,Male,Senior,Mix,2017/12/07,Scamp,03/08/2014 05:10:00 PM,Return to Owner,2014/03/08
7,A006100,Scamp,12/07/2017 02:07:00 PM,Colony Creek And Hunters Trace in Austin (TX),Stray,Normal,Dog,Spinone Italiano Mix,Yellow/White,10,Neutered,Male,Senior,Mix,2017/12/07,Scamp,12/20/2014 04:35:00 PM,Return to Owner,2014/12/20
8,A006100,Scamp,12/07/2017 02:07:00 PM,Colony Creek And Hunters Trace in Austin (TX),Stray,Normal,Dog,Spinone Italiano Mix,Yellow/White,10,Neutered,Male,Senior,Mix,2017/12/07,Scamp,12/07/2017 12:00:00 AM,Return to Owner,2017/12/07
9,A047759,Oreo,04/02/2014 03:55:00 PM,Austin (TX),Owner Surrender,Normal,Dog,Dachshund,Tricolor,10,Neutered,Male,Senior,Pure,2014/04/02,Oreo,04/07/2014 03:12:00 PM,Transfer,2014/04/07


In [30]:
print(data_df.shape)

(110681, 19)


In [31]:
data_df_filtered = data_df[data_df['out_Y_m_d'] >= data_df["in_Y_m_d"]]
print(data_df_filtered.shape)
data_df_filtered

(93382, 19)


Unnamed: 0,Animal ID,Name_x,DateTime_x,Found Location,Intake Type,Intake Condition,Animal Type,Breed,Color,Age_in_Years,Intactness,Sex,Age,Breed_Type,in_Y_m_d,Name_y,DateTime_y,Outcome Type,out_Y_m_d
0,A006100,Scamp,03/07/2014 02:26:00 PM,8700 Research in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,6,Neutered,Male,Adult,Mix,2014/03/07,Scamp,03/08/2014 05:10:00 PM,Return to Owner,2014/03/08
1,A006100,Scamp,03/07/2014 02:26:00 PM,8700 Research in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,6,Neutered,Male,Adult,Mix,2014/03/07,Scamp,12/20/2014 04:35:00 PM,Return to Owner,2014/12/20
2,A006100,Scamp,03/07/2014 02:26:00 PM,8700 Research in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,6,Neutered,Male,Adult,Mix,2014/03/07,Scamp,12/07/2017 12:00:00 AM,Return to Owner,2017/12/07
4,A006100,Scamp,12/19/2014 10:21:00 AM,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,7,Neutered,Male,Adult,Mix,2014/12/19,Scamp,12/20/2014 04:35:00 PM,Return to Owner,2014/12/20
5,A006100,Scamp,12/19/2014 10:21:00 AM,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,7,Neutered,Male,Adult,Mix,2014/12/19,Scamp,12/07/2017 12:00:00 AM,Return to Owner,2017/12/07
8,A006100,Scamp,12/07/2017 02:07:00 PM,Colony Creek And Hunters Trace in Austin (TX),Stray,Normal,Dog,Spinone Italiano Mix,Yellow/White,10,Neutered,Male,Senior,Mix,2017/12/07,Scamp,12/07/2017 12:00:00 AM,Return to Owner,2017/12/07
9,A047759,Oreo,04/02/2014 03:55:00 PM,Austin (TX),Owner Surrender,Normal,Dog,Dachshund,Tricolor,10,Neutered,Male,Senior,Pure,2014/04/02,Oreo,04/07/2014 03:12:00 PM,Transfer,2014/04/07
10,A134067,Bandit,11/16/2013 09:02:00 AM,12034 Research Blvd in Austin (TX),Public Assist,Injured,Dog,Shetland Sheepdog,Brown/White,16,Neutered,Male,Senior,Pure,2013/11/16,Bandit,11/16/2013 11:54:00 AM,Return to Owner,2013/11/16
11,A141142,Bettie,11/16/2013 02:46:00 PM,Austin (TX),Stray,Aged,Dog,Labrador Retriever/Pit Bull,Black/White,15,Spayed,Female,Senior,Mix,2013/11/16,Bettie,11/17/2013 11:40:00 AM,Return to Owner,2013/11/17
12,A163459,Sasha,11/14/2014 03:11:00 PM,Ih 35 And 41St St in Austin (TX),Stray,Normal,Dog,Miniature Schnauzer Mix,Black/Gray,15,Intact,Female,Senior,Mix,2014/11/14,Sasha,11/14/2014 07:28:00 PM,Return to Owner,2014/11/14


In [32]:
data_df_filtered = data_df_filtered.reset_index(drop=True)
data_df_test=data_df_filtered.head(10)
print(data_df_test.shape)
data_df_test

(10, 19)


Unnamed: 0,Animal ID,Name_x,DateTime_x,Found Location,Intake Type,Intake Condition,Animal Type,Breed,Color,Age_in_Years,Intactness,Sex,Age,Breed_Type,in_Y_m_d,Name_y,DateTime_y,Outcome Type,out_Y_m_d
0,A006100,Scamp,03/07/2014 02:26:00 PM,8700 Research in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,6,Neutered,Male,Adult,Mix,2014/03/07,Scamp,03/08/2014 05:10:00 PM,Return to Owner,2014/03/08
1,A006100,Scamp,03/07/2014 02:26:00 PM,8700 Research in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,6,Neutered,Male,Adult,Mix,2014/03/07,Scamp,12/20/2014 04:35:00 PM,Return to Owner,2014/12/20
2,A006100,Scamp,03/07/2014 02:26:00 PM,8700 Research in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,6,Neutered,Male,Adult,Mix,2014/03/07,Scamp,12/07/2017 12:00:00 AM,Return to Owner,2017/12/07
3,A006100,Scamp,12/19/2014 10:21:00 AM,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,7,Neutered,Male,Adult,Mix,2014/12/19,Scamp,12/20/2014 04:35:00 PM,Return to Owner,2014/12/20
4,A006100,Scamp,12/19/2014 10:21:00 AM,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,7,Neutered,Male,Adult,Mix,2014/12/19,Scamp,12/07/2017 12:00:00 AM,Return to Owner,2017/12/07
5,A006100,Scamp,12/07/2017 02:07:00 PM,Colony Creek And Hunters Trace in Austin (TX),Stray,Normal,Dog,Spinone Italiano Mix,Yellow/White,10,Neutered,Male,Senior,Mix,2017/12/07,Scamp,12/07/2017 12:00:00 AM,Return to Owner,2017/12/07
6,A047759,Oreo,04/02/2014 03:55:00 PM,Austin (TX),Owner Surrender,Normal,Dog,Dachshund,Tricolor,10,Neutered,Male,Senior,Pure,2014/04/02,Oreo,04/07/2014 03:12:00 PM,Transfer,2014/04/07
7,A134067,Bandit,11/16/2013 09:02:00 AM,12034 Research Blvd in Austin (TX),Public Assist,Injured,Dog,Shetland Sheepdog,Brown/White,16,Neutered,Male,Senior,Pure,2013/11/16,Bandit,11/16/2013 11:54:00 AM,Return to Owner,2013/11/16
8,A141142,Bettie,11/16/2013 02:46:00 PM,Austin (TX),Stray,Aged,Dog,Labrador Retriever/Pit Bull,Black/White,15,Spayed,Female,Senior,Mix,2013/11/16,Bettie,11/17/2013 11:40:00 AM,Return to Owner,2013/11/17
9,A163459,Sasha,11/14/2014 03:11:00 PM,Ih 35 And 41St St in Austin (TX),Stray,Normal,Dog,Miniature Schnauzer Mix,Black/Gray,15,Intact,Female,Senior,Mix,2014/11/14,Sasha,11/14/2014 07:28:00 PM,Return to Owner,2014/11/14


In [33]:
data_df_test["Drop"] = ""
for row in (data_df_test.index):
    prevrow = row - 1
    if(((prevrow) >= 0) and (row <= 7)):
        if((data_df_test["Animal ID"].values[row] ==  data_df_test["Animal ID"].values[prevrow]) and (data_df_test["in_Y_m_d"].values[row] == data_df_test["in_Y_m_d"].values[prevrow]) and (data_df_test["out_Y_m_d"]).values[row] > data_df_test["out_Y_m_d"].values[prevrow]):
            data_df_test["Drop"].values[row] = "Yes"
        else:
            data_df_test["Drop"].values[row] = "No"
    else:
        data_df_test["Drop"].values[row] = "No"
print(data_df_test.shape)
data_df_test

(10, 20)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Animal ID,Name_x,DateTime_x,Found Location,Intake Type,Intake Condition,Animal Type,Breed,Color,Age_in_Years,Intactness,Sex,Age,Breed_Type,in_Y_m_d,Name_y,DateTime_y,Outcome Type,out_Y_m_d,Drop
0,A006100,Scamp,03/07/2014 02:26:00 PM,8700 Research in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,6,Neutered,Male,Adult,Mix,2014/03/07,Scamp,03/08/2014 05:10:00 PM,Return to Owner,2014/03/08,No
1,A006100,Scamp,03/07/2014 02:26:00 PM,8700 Research in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,6,Neutered,Male,Adult,Mix,2014/03/07,Scamp,12/20/2014 04:35:00 PM,Return to Owner,2014/12/20,Yes
2,A006100,Scamp,03/07/2014 02:26:00 PM,8700 Research in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,6,Neutered,Male,Adult,Mix,2014/03/07,Scamp,12/07/2017 12:00:00 AM,Return to Owner,2017/12/07,Yes
3,A006100,Scamp,12/19/2014 10:21:00 AM,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,7,Neutered,Male,Adult,Mix,2014/12/19,Scamp,12/20/2014 04:35:00 PM,Return to Owner,2014/12/20,No
4,A006100,Scamp,12/19/2014 10:21:00 AM,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,7,Neutered,Male,Adult,Mix,2014/12/19,Scamp,12/07/2017 12:00:00 AM,Return to Owner,2017/12/07,Yes
5,A006100,Scamp,12/07/2017 02:07:00 PM,Colony Creek And Hunters Trace in Austin (TX),Stray,Normal,Dog,Spinone Italiano Mix,Yellow/White,10,Neutered,Male,Senior,Mix,2017/12/07,Scamp,12/07/2017 12:00:00 AM,Return to Owner,2017/12/07,No
6,A047759,Oreo,04/02/2014 03:55:00 PM,Austin (TX),Owner Surrender,Normal,Dog,Dachshund,Tricolor,10,Neutered,Male,Senior,Pure,2014/04/02,Oreo,04/07/2014 03:12:00 PM,Transfer,2014/04/07,No
7,A134067,Bandit,11/16/2013 09:02:00 AM,12034 Research Blvd in Austin (TX),Public Assist,Injured,Dog,Shetland Sheepdog,Brown/White,16,Neutered,Male,Senior,Pure,2013/11/16,Bandit,11/16/2013 11:54:00 AM,Return to Owner,2013/11/16,No
8,A141142,Bettie,11/16/2013 02:46:00 PM,Austin (TX),Stray,Aged,Dog,Labrador Retriever/Pit Bull,Black/White,15,Spayed,Female,Senior,Mix,2013/11/16,Bettie,11/17/2013 11:40:00 AM,Return to Owner,2013/11/17,No
9,A163459,Sasha,11/14/2014 03:11:00 PM,Ih 35 And 41St St in Austin (TX),Stray,Normal,Dog,Miniature Schnauzer Mix,Black/Gray,15,Intact,Female,Senior,Mix,2014/11/14,Sasha,11/14/2014 07:28:00 PM,Return to Owner,2014/11/14,No


In [34]:
merged_df = data_df_test[data_df_test['Drop'] == "No"]
merged_df = merged_df.reset_index(drop=True)
print(merged_df.shape)
merged_df

(7, 20)


Unnamed: 0,Animal ID,Name_x,DateTime_x,Found Location,Intake Type,Intake Condition,Animal Type,Breed,Color,Age_in_Years,Intactness,Sex,Age,Breed_Type,in_Y_m_d,Name_y,DateTime_y,Outcome Type,out_Y_m_d,Drop
0,A006100,Scamp,03/07/2014 02:26:00 PM,8700 Research in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,6,Neutered,Male,Adult,Mix,2014/03/07,Scamp,03/08/2014 05:10:00 PM,Return to Owner,2014/03/08,No
1,A006100,Scamp,12/19/2014 10:21:00 AM,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,7,Neutered,Male,Adult,Mix,2014/12/19,Scamp,12/20/2014 04:35:00 PM,Return to Owner,2014/12/20,No
2,A006100,Scamp,12/07/2017 02:07:00 PM,Colony Creek And Hunters Trace in Austin (TX),Stray,Normal,Dog,Spinone Italiano Mix,Yellow/White,10,Neutered,Male,Senior,Mix,2017/12/07,Scamp,12/07/2017 12:00:00 AM,Return to Owner,2017/12/07,No
3,A047759,Oreo,04/02/2014 03:55:00 PM,Austin (TX),Owner Surrender,Normal,Dog,Dachshund,Tricolor,10,Neutered,Male,Senior,Pure,2014/04/02,Oreo,04/07/2014 03:12:00 PM,Transfer,2014/04/07,No
4,A134067,Bandit,11/16/2013 09:02:00 AM,12034 Research Blvd in Austin (TX),Public Assist,Injured,Dog,Shetland Sheepdog,Brown/White,16,Neutered,Male,Senior,Pure,2013/11/16,Bandit,11/16/2013 11:54:00 AM,Return to Owner,2013/11/16,No
5,A141142,Bettie,11/16/2013 02:46:00 PM,Austin (TX),Stray,Aged,Dog,Labrador Retriever/Pit Bull,Black/White,15,Spayed,Female,Senior,Mix,2013/11/16,Bettie,11/17/2013 11:40:00 AM,Return to Owner,2013/11/17,No
6,A163459,Sasha,11/14/2014 03:11:00 PM,Ih 35 And 41St St in Austin (TX),Stray,Normal,Dog,Miniature Schnauzer Mix,Black/Gray,15,Intact,Female,Senior,Mix,2014/11/14,Sasha,11/14/2014 07:28:00 PM,Return to Owner,2014/11/14,No


In [42]:
data_df_filt = data_df_filtered.reset_index(drop=True)
data_df_filt["Drop"] = ""
for row in (data_df_filt.index):
    prevrow = row - 1
    if(((prevrow) >= 0) and (row <= 110681)):
        if((data_df_filt["Animal ID"].values[row] ==  data_df_filt["Animal ID"].values[prevrow]) and (data_df_filt["in_Y_m_d"].values[row] == data_df_filt["in_Y_m_d"].values[prevrow]) and (data_df_filt["out_Y_m_d"]).values[row] > data_df_filt["out_Y_m_d"].values[prevrow]):
            data_df_filt["Drop"].values[row] = "Yes"
        else:
            data_df_filt["Drop"].values[row] = "No"
    else:
        data_df_filt["Drop"].values[row] = "No"
print(data_df_filt.shape)
data_df_filt

(93382, 20)


Unnamed: 0,Animal ID,Name_x,DateTime_x,Found Location,Intake Type,Intake Condition,Animal Type,Breed,Color,Age_in_Years,Intactness,Sex,Age,Breed_Type,in_Y_m_d,Name_y,DateTime_y,Outcome Type,out_Y_m_d,Drop
0,A006100,Scamp,03/07/2014 02:26:00 PM,8700 Research in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,6,Neutered,Male,Adult,Mix,2014/03/07,Scamp,03/08/2014 05:10:00 PM,Return to Owner,2014/03/08,No
1,A006100,Scamp,03/07/2014 02:26:00 PM,8700 Research in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,6,Neutered,Male,Adult,Mix,2014/03/07,Scamp,12/20/2014 04:35:00 PM,Return to Owner,2014/12/20,Yes
2,A006100,Scamp,03/07/2014 02:26:00 PM,8700 Research in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,6,Neutered,Male,Adult,Mix,2014/03/07,Scamp,12/07/2017 12:00:00 AM,Return to Owner,2017/12/07,Yes
3,A006100,Scamp,12/19/2014 10:21:00 AM,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,7,Neutered,Male,Adult,Mix,2014/12/19,Scamp,12/20/2014 04:35:00 PM,Return to Owner,2014/12/20,No
4,A006100,Scamp,12/19/2014 10:21:00 AM,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,7,Neutered,Male,Adult,Mix,2014/12/19,Scamp,12/07/2017 12:00:00 AM,Return to Owner,2017/12/07,Yes
5,A006100,Scamp,12/07/2017 02:07:00 PM,Colony Creek And Hunters Trace in Austin (TX),Stray,Normal,Dog,Spinone Italiano Mix,Yellow/White,10,Neutered,Male,Senior,Mix,2017/12/07,Scamp,12/07/2017 12:00:00 AM,Return to Owner,2017/12/07,No
6,A047759,Oreo,04/02/2014 03:55:00 PM,Austin (TX),Owner Surrender,Normal,Dog,Dachshund,Tricolor,10,Neutered,Male,Senior,Pure,2014/04/02,Oreo,04/07/2014 03:12:00 PM,Transfer,2014/04/07,No
7,A134067,Bandit,11/16/2013 09:02:00 AM,12034 Research Blvd in Austin (TX),Public Assist,Injured,Dog,Shetland Sheepdog,Brown/White,16,Neutered,Male,Senior,Pure,2013/11/16,Bandit,11/16/2013 11:54:00 AM,Return to Owner,2013/11/16,No
8,A141142,Bettie,11/16/2013 02:46:00 PM,Austin (TX),Stray,Aged,Dog,Labrador Retriever/Pit Bull,Black/White,15,Spayed,Female,Senior,Mix,2013/11/16,Bettie,11/17/2013 11:40:00 AM,Return to Owner,2013/11/17,No
9,A163459,Sasha,11/14/2014 03:11:00 PM,Ih 35 And 41St St in Austin (TX),Stray,Normal,Dog,Miniature Schnauzer Mix,Black/Gray,15,Intact,Female,Senior,Mix,2014/11/14,Sasha,11/14/2014 07:28:00 PM,Return to Owner,2014/11/14,No


In [43]:
merged_df = data_df_filt[data_df_filt['Drop'] == "No"]
merged_df = merged_df.reset_index(drop=True)
print(merged_df.shape)
merged_df

(75991, 20)


Unnamed: 0,Animal ID,Name_x,DateTime_x,Found Location,Intake Type,Intake Condition,Animal Type,Breed,Color,Age_in_Years,Intactness,Sex,Age,Breed_Type,in_Y_m_d,Name_y,DateTime_y,Outcome Type,out_Y_m_d,Drop
0,A006100,Scamp,03/07/2014 02:26:00 PM,8700 Research in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,6,Neutered,Male,Adult,Mix,2014/03/07,Scamp,03/08/2014 05:10:00 PM,Return to Owner,2014/03/08,No
1,A006100,Scamp,12/19/2014 10:21:00 AM,8700 Research Blvd in Austin (TX),Public Assist,Normal,Dog,Spinone Italiano Mix,Yellow/White,7,Neutered,Male,Adult,Mix,2014/12/19,Scamp,12/20/2014 04:35:00 PM,Return to Owner,2014/12/20,No
2,A006100,Scamp,12/07/2017 02:07:00 PM,Colony Creek And Hunters Trace in Austin (TX),Stray,Normal,Dog,Spinone Italiano Mix,Yellow/White,10,Neutered,Male,Senior,Mix,2017/12/07,Scamp,12/07/2017 12:00:00 AM,Return to Owner,2017/12/07,No
3,A047759,Oreo,04/02/2014 03:55:00 PM,Austin (TX),Owner Surrender,Normal,Dog,Dachshund,Tricolor,10,Neutered,Male,Senior,Pure,2014/04/02,Oreo,04/07/2014 03:12:00 PM,Transfer,2014/04/07,No
4,A134067,Bandit,11/16/2013 09:02:00 AM,12034 Research Blvd in Austin (TX),Public Assist,Injured,Dog,Shetland Sheepdog,Brown/White,16,Neutered,Male,Senior,Pure,2013/11/16,Bandit,11/16/2013 11:54:00 AM,Return to Owner,2013/11/16,No
5,A141142,Bettie,11/16/2013 02:46:00 PM,Austin (TX),Stray,Aged,Dog,Labrador Retriever/Pit Bull,Black/White,15,Spayed,Female,Senior,Mix,2013/11/16,Bettie,11/17/2013 11:40:00 AM,Return to Owner,2013/11/17,No
6,A163459,Sasha,11/14/2014 03:11:00 PM,Ih 35 And 41St St in Austin (TX),Stray,Normal,Dog,Miniature Schnauzer Mix,Black/Gray,15,Intact,Female,Senior,Mix,2014/11/14,Sasha,11/14/2014 07:28:00 PM,Return to Owner,2014/11/14,No
7,A165752,Pep,09/15/2014 11:28:00 AM,Gatlin Gun Rd And Brodie in Austin (TX),Stray,Normal,Dog,Lhasa Apso Mix,Brown/White,15,Neutered,Male,Senior,Mix,2014/09/15,Pep,09/15/2014 04:35:00 PM,Return to Owner,2014/09/15,No
8,A178569,Boti,03/17/2014 09:45:00 AM,Austin (TX),Public Assist,Normal,Dog,Shetland Sheepdog Mix,White/Black,15,Neutered,Male,Senior,Mix,2014/03/17,Boti,03/23/2014 03:57:00 PM,Return to Owner,2014/03/23,No
9,A189592,Ophelia,09/18/2015 05:46:00 PM,Chesney And Slaughter in Austin (TX),Stray,Normal,Dog,Shetland Sheepdog Mix,Brown/White,18,Spayed,Female,Senior,Mix,2015/09/18,Ophelia,09/18/2015 07:04:00 PM,Return to Owner,2015/09/18,No


In [44]:
merged_df.to_csv('Resources/merged.csv')