In [44]:
import pandas as pd

# Read CSV files into DataFrames
intakes_df = pd.read_csv('Austin_Animal_Center_Intakes.csv')
outcomes_df = pd.read_csv('Austin_Animal_Center_Outcomes.csv')

# Check unique 'Animal ID' values before filtering duplicates
print("Unique Animal IDs in Intakes before filtering:", intakes_df['Animal ID'].nunique())
print("Unique Animal IDs in Outcomes before filtering:", outcomes_df['Animal ID'].nunique())

# Drop duplicates based on 'Animal ID' in both DataFrames
intakes_df = intakes_df.drop_duplicates(subset='Animal ID')
outcomes_df = outcomes_df.drop_duplicates(subset='Animal ID')

# Check unique 'Animal ID' values after filtering duplicates
print("Unique Animal IDs in Intakes after filtering:", intakes_df['Animal ID'].nunique())
print("Unique Animal IDs in Outcomes after filtering:", outcomes_df['Animal ID'].nunique())

# Set 'Animal ID' as the index for both DataFrames
intakes_df.set_index('Animal ID', inplace=True)
outcomes_df.set_index('Animal ID', inplace=True)

# Join the intake data with the outcome data based on the index ('Animal ID')
result_df = intakes_df.join(outcomes_df, how='inner', lsuffix='_intakes', rsuffix='_outcomes')

# Check unique 'Animal ID' values after the join
print("Unique Animal IDs in Result:", result_df.index.nunique())


Unique Animal IDs in Intakes before filtering: 58552
Unique Animal IDs in Outcomes before filtering: 29044
Unique Animal IDs in Intakes after filtering: 58552
Unique Animal IDs in Outcomes after filtering: 29044
Unique Animal IDs in Result: 28745


In [45]:
intakes_df.shape

(58552, 11)

In [46]:
outcomes_df.shape

(29044, 11)

In [47]:
result_df.shape

(28745, 22)

In [48]:
result_df.duplicated().sum()

56

In [49]:
result_df.head()

Unnamed: 0_level_0,Name_intakes,DateTime_intakes,MonthYear_intakes,Found Location,Intake Type,Intake Condition,Animal Type_intakes,Sex upon Intake,Age upon Intake,Breed_intakes,...,DateTime_outcomes,MonthYear_outcomes,Date of Birth,Outcome Type,Outcome Subtype,Animal Type_outcomes,Sex upon Outcome,Age upon Outcome,Breed_outcomes,Color_outcomes
Animal ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A664887,*Gia,10/10/2013 01:48:00 PM,October 2013,1901 Onion Creek Pkwy in Austin (TX),Stray,Normal,Cat,Intact Female,1 month,Domestic Shorthair Mix,...,11/10/2013 04:56:00 PM,Nov 2013,08/10/2013,Adoption,,Cat,Spayed Female,3 months,Domestic Shorthair Mix,Black
A665496,Mikey,10/18/2013 06:07:00 PM,October 2013,12001 Metric Blvd in Austin (TX),Stray,Normal,Cat,Neutered Male,3 years,Bengal,...,10/22/2013 05:54:00 PM,Oct 2013,04/18/2010,Adoption,,Cat,Neutered Male,3 years,Bengal,Brown Tiger
A664936,*Jester,10/11/2013 11:20:00 AM,October 2013,501 U.S. 183 in Austin (TX),Stray,Normal,Cat,Intact Male,1 month,Domestic Medium Hair Mix,...,12/18/2013 06:18:00 PM,Dec 2013,08/18/2013,Adoption,,Cat,Neutered Male,4 months,Domestic Medium Hair Mix,Brown Tabby
A665398,Haven,10/17/2013 12:26:00 PM,October 2013,Austin (TX),Owner Surrender,Normal,Cat,Intact Female,1 month,Domestic Shorthair Mix,...,11/10/2013 04:45:00 PM,Nov 2013,08/19/2013,Adoption,,Cat,Spayed Female,2 months,Domestic Shorthair Mix,White/Blue
A665426,Silka,10/17/2013 04:53:00 PM,October 2013,8224 Research Blvd #244 in Austin (TX),Stray,Normal,Cat,Intact Male,1 month,Domestic Shorthair Mix,...,11/16/2013 05:09:00 PM,Nov 2013,08/26/2013,Adoption,,Cat,Neutered Male,2 months,Domestic Shorthair Mix,Black


In [50]:
# Reset the index to include 'Animal ID' as a column
result_df.reset_index(inplace=True)
result_df.to_csv("../dataset/Texas_Intake_and_Outcome.csv", index = False)

In [51]:
result_df.duplicated().sum()

0

In [43]:
# Read CSV 
reading = pd.read_csv("../dataset/Texas_Intake_and_Outcome.csv")
reading.head()

Unnamed: 0,Name_intakes,DateTime_intakes,MonthYear_intakes,Found Location,Intake Type,Intake Condition,Animal Type_intakes,Sex upon Intake,Age upon Intake,Breed_intakes,...,DateTime_outcomes,MonthYear_outcomes,Date of Birth,Outcome Type,Outcome Subtype,Animal Type_outcomes,Sex upon Outcome,Age upon Outcome,Breed_outcomes,Color_outcomes
0,*Gia,10/10/2013 01:48:00 PM,October 2013,1901 Onion Creek Pkwy in Austin (TX),Stray,Normal,Cat,Intact Female,1 month,Domestic Shorthair Mix,...,11/10/2013 04:56:00 PM,Nov 2013,08/10/2013,Adoption,,Cat,Spayed Female,3 months,Domestic Shorthair Mix,Black
1,Mikey,10/18/2013 06:07:00 PM,October 2013,12001 Metric Blvd in Austin (TX),Stray,Normal,Cat,Neutered Male,3 years,Bengal,...,10/22/2013 05:54:00 PM,Oct 2013,04/18/2010,Adoption,,Cat,Neutered Male,3 years,Bengal,Brown Tiger
2,*Jester,10/11/2013 11:20:00 AM,October 2013,501 U.S. 183 in Austin (TX),Stray,Normal,Cat,Intact Male,1 month,Domestic Medium Hair Mix,...,12/18/2013 06:18:00 PM,Dec 2013,08/18/2013,Adoption,,Cat,Neutered Male,4 months,Domestic Medium Hair Mix,Brown Tabby
3,Haven,10/17/2013 12:26:00 PM,October 2013,Austin (TX),Owner Surrender,Normal,Cat,Intact Female,1 month,Domestic Shorthair Mix,...,11/10/2013 04:45:00 PM,Nov 2013,08/19/2013,Adoption,,Cat,Spayed Female,2 months,Domestic Shorthair Mix,White/Blue
4,Silka,10/17/2013 04:53:00 PM,October 2013,8224 Research Blvd #244 in Austin (TX),Stray,Normal,Cat,Intact Male,1 month,Domestic Shorthair Mix,...,11/16/2013 05:09:00 PM,Nov 2013,08/26/2013,Adoption,,Cat,Neutered Male,2 months,Domestic Shorthair Mix,Black
