In [None]:
import requests
import pandas as pd
import time

"""Petfinder API to pull listings for adoptable pets with pagination and retry logic."""



# 🔐 Step 1: Authenticate and obtain API access token
auth_data = {
    "grant_type": "client_credentials",
    "client_id": CLIENT_ID,
    "client_secret": CLIENT_SECRET
}
response = requests.post(TOKEN_URL, data=auth_data)
access_token = response.json().get("access_token")

if not access_token:
    print(f"❌ Failed to obtain access token: {response.json()}")
    exit()

# 🏗 Step 2: Define headers for authentication
headers = {"Authorization": f"Bearer {access_token}"}

# 🐶 Step 3: Fetch adoptable pets using pagination with retry logic
PET_URL = "https://api.petfinder.com/v2/animals"
all_pets = []  # Store all pets data

page = 1  # Start from first page
while True:
    params = {"type": "dog", "location": "75001", "limit": 100, "page": page}
    success = False
    attempts = 0
    while not success and attempts < 3:
        response = requests.get(PET_URL, params=params, headers=headers)
        if response.status_code == 200:
            pet_data = response.json()["animals"]
            success = True  # Successful retrieval
        else:
            print(f"❌ Failed to fetch pets on page {page}: {response.status_code} - attempt {attempts+1}")
            attempts += 1
            time.sleep(5)  # Wait 5 seconds before retrying

    if not success:
        print(f"❌ Giving up on page {page}.")
        break

    if not pet_data:  # If no more pets are returned, stop pagination
        break

    all_pets.extend(pet_data)  # Append current batch of pets
    page += 1  # Move to next page

# 📊 Convert all pet data to a DataFrame
df_pets = pd.json_normalize(all_pets)
# 🎯 Select relevant columns
df_pet = df_pets[["id", "name", "breeds.primary", "age", "organization_id", "url", "contact.address.city", "contact.address.state"]]
df_pet.columns = ["Pet ID", "Name", "Breed", "Age", "Shelter ID", "Adoption Link", "City", "State"]

# 🏠 Step 4: Fetch shelter details dynamically
shelter_ids = df_pet["Shelter ID"].dropna().unique()
df_shelters = pd.DataFrame()

for shelter_id in shelter_ids:
    ORG_URL = f"https://api.petfinder.com/v2/organizations/{shelter_id}"
    response = requests.get(ORG_URL, headers=headers)
    if response.status_code == 200:
        shelter_data = response.json()["organization"]
        df_temp = pd.json_normalize(shelter_data)
        df_shelters = pd.concat([df_shelters, df_temp], ignore_index=True)

# 🎯 Select relevant columns for shelters
df_shelters = df_shelters[["id", "name", "address.city", "address.state", "phone", "url"]]
df_shelters.columns = ["Shelter ID", "Shelter Name", "City", "State", "Phone", "Website"]

# 🔗 Merge Pet & Shelter DataFrames
df_combined = df_pet.merge(df_shelters, on="Shelter ID", how="left")

# 💾 Step 5: Save DataFrames to Excel and CSV
df_pet.to_csv("adoptable_pets.csv", index=False)
df_shelters.to_csv("shelter_details.csv", index=False)
df_combined.to_csv("adoptable_pets_shelters.csv", index=False)
df_pet.to_csv("original_adoptable_pets.csv", index=False)
df_combined.to_excel("adoptable_pets_shelters.xlsx", index=False, sheet_name="Adoption Data")

print("✅ Data successfully saved! 🎉")
print("📂 CSV files: 'adoptable_pets.csv', 'shelter_details.csv', 'adoptable_pets_shelters.csv'")
print("📂 Excel file: 'adoptable_pets_shelters.xlsx'")
print(df_combined.head())

❌ Failed to fetch pets on page 1: 429 - attempt 1
❌ Failed to fetch pets on page 1: 429 - attempt 2


KeyboardInterrupt: 

In [23]:
df_combined.shape

(8006, 13)

In [24]:
df_combined.columns

Index(['Pet ID', 'Name', 'Breed', 'Age', 'Shelter ID', 'Adoption Link',
       'City_x', 'State_x', 'Shelter Name', 'City_y', 'State_y', 'Phone',
       'Website'],
      dtype='object')

In [16]:
print(df_combined.head())

     Pet ID           Name         Breed    Age Shelter ID  \
0  76440074    Little Pepe     Chihuahua  Adult     TX2896   
1  76440023    Little Pepe     Chihuahua  Adult     TX2896   
2  76440021         Weasel     Dachshund  Young     TX1223   
3  76440020        Petunia         Corgi  Young     TX1223   
4  76438778  Tori PKA Tory  Aussiedoodle  Adult     TX1568   

                                       Adoption Link            City_x  \
0  https://www.petfinder.com/dog/little-pepe-7644...              Krum   
1  https://www.petfinder.com/dog/little-pepe-7644...  Highland Village   
2  https://www.petfinder.com/dog/weasel-76440021/...         Arlington   
3  https://www.petfinder.com/dog/petunia-76440020...         Arlington   
4  https://www.petfinder.com/dog/tori-pka-tory-76...            Dallas   

  State_x              Shelter Name     City_y State_y           Phone  \
0      TX         Ruff Road Revival       Krum      TX            None   
1      TX         Ruff Road Reviva

In [17]:
df_pets['City'].unique()

array(['Krum', 'Highland Village', 'Arlington', 'Dallas', 'Carrollton',
       'Seven Points', 'Richardson', 'Euless', 'McKinney', 'Fort Worth',
       'Sanger', 'Farmers Branch', 'Cedar Hill', 'Granbury', 'Lewisville',
       'Quinlan', 'Colleyville', 'Alvarado', 'Plano', 'Gainesville',
       'Little Elm', 'Pilot Point', 'Irving', 'Corsicana', 'Denison',
       'Weatherford', 'Rockwall', 'Itasca', 'Stephenville', 'Grapevine',
       'Forney', 'Waco', 'Ardmore', 'North Richland Hills', 'Frisco',
       'Mesquite', 'Justin', 'Graham', 'Keller', 'Grand Prairie',
       'Sherman', 'Flower Mound', 'Tyler', 'Greenville', 'Henrietta',
       'Farmersville', 'Ennis', 'Southlake', 'Rowlett', 'Point',
       'Benbrook', 'Madill', 'Pottsboro', 'Haslet', 'Tishomingo', 'Allen',
       'Mineola', 'Waxahachie', 'Garland', 'Sunnyvale', 'Lavon', 'Mexia',
       'Joshua', 'Saginaw', 'Commerce', 'Van Alstyne', 'Paris', 'EMORY',
       'White Settlement', 'Royse City', 'Springtown', 'Whitewright',
     

In [18]:
df_shelters['City'].unique()

array(['Krum', 'Arlington', 'Dallas', 'Seven Points', 'Richardson',
       'Euless', 'McKinney', 'Warren', 'Fort Worth', 'Mathis',
       'Sherman Oaks', 'Farmers Branch', 'Carrollton', 'Cedar Hill',
       'Granbury', 'Lewisville', 'Quinlan', 'Colleyville', 'Austin',
       'Gainesville', 'Little Elm', 'Pilot Point', 'Irving', 'Corsicana',
       'Denison', 'Weatherford', 'Rockwall', 'Itasca', 'Garland',
       'Stephenville', 'Conroe', 'Plano', 'Forney', 'Ardmore',
       'North Richland Hills', 'Frisco', 'Mesquite', 'Houston',
       'Brookeville', 'Justin', 'Graham', 'Keller', 'Grand Prairie',
       'Sherman', 'Kaufman', 'Tyler', 'Greenville', 'Henrietta',
       'Farmersville', 'Ennis', 'Grapevine', 'Southlake', 'Rowlett',
       'Point', 'Benbrook', 'Madill', 'Pottsboro', 'Haslet', 'Tishomingo',
       'Allen', 'Bullard', 'Mineola', 'Waxahachie', 'Sunnyvale', 'Bryan',
       'Lavon', 'Alvarado', 'Mexia', 'Joshua', 'Saginaw', 'Commerce',
       'Van Alstyne', 'Flower Mound', 'Par