In [1]:
import re
import requests
import os
from datetime import datetime

from bs4 import BeautifulSoup, Comment
from bs4.element import Tag

import pandas as pd
from tqdm import tqdm

In [2]:
def get_description(breed_soup):
    try:
        description = breed_soup.find(
                'p', class_='breed-page__about__read-more__text'
        ).get_text().strip()
    except:
        description = ''
        
    
    # Removing weird characters
    # Probably not exhaustive
    description = description.replace(
        '\n', '').replace('\u200b', '').replace('\xa0', ' ')
    
    return description

In [3]:
def get_temperament(breed_soup):
    temperment_class = 'breed-page__intro__temperment'
    try:
        return breed_soup.find(
            'p', class_=temperment_class
        ).get_text().replace(' / ', ',')
    except:
        return ''

In [4]:
def general_regex(text, var, mul=1):
    reg = re.compile('(\d+\.?\d*)')
    results = reg.findall(text)
    numbers = [float(value) * mul for value in results]
    if len(numbers) == 1:
        numbers = numbers * 2
    elif len(numbers) == 0:
        numbers = [0, 0]
    return {
        'min_{}'.format(var): min(numbers),
        'max_{}'.format(var): max(numbers)
    }

In [5]:
# Define scraping functions for each attribute
def get_popularity(attribute):
    return attribute.replace('Ranks', '').split('of')[0].strip().split(':')[1]

def get_height(attribute):
    return general_regex(attribute, 'height')

def get_weight(attribute):
    return general_regex(attribute, 'weight') 

def get_expectancy(attribute):
    return general_regex(attribute, 'expectancy')

def get_group(attribute):
    return attribute.split(':')[1].strip()

# Create a dictionary mapping attribute names to their respective scraping functions
attr_function = {
    'akc_breed_popularity': get_popularity,
    'height': get_height,
    'weight': get_weight,
    'life_expectancy': get_expectancy,
    'group': get_group
}

# Function to extract attributes from a comment
def get_attributes_from_comment(comment):
    attributes_data = {}
    
    comment_content = BeautifulSoup(comment, 'html.parser')
    
    data_objects = comment_content.find_all('dataobject', type='document')
    
    for data_object in data_objects:
        attributes = data_object.find_all('attribute')
        
        for attribute in attributes:
            name = attribute.get('name')
            value = attribute.get_text(strip=True)
            
            # Check if the attribute name is in the attr_function dictionary
            if name in attr_function:
                scraping_function = attr_function[name]
                if(name in ['height', 'weight', 'life_expectancy']):
                    attr_dict = scraping_function(value)
                    for key in attr_dict.keys():
                        attributes_data[key] = attr_dict[key]
                    
                else:
                    attributes_data[name] = scraping_function(value)
    
    return attributes_data

In [6]:
def get_main_attributes(breed_soup):
    breed_attributes = []
    # Find all comments in the HTML
    comments = breed_soup.find_all(string=lambda text: isinstance(text, Comment))

    # Iterate through comments
    for comment in comments:
        comment_content = BeautifulSoup(comment, 'html.parser')

        if comment_content.find('pagemap'):
            breed_attributes = get_attributes_from_comment(comment)
    
    print('breed_attributes', breed_attributes)
    return breed_attributes

In [7]:
def get_care_info(breed_soup):
    titles = breed_soup.find_all(
        'h4', class_='bar-graph__title'
    )
    
    values = breed_soup.find_all(
        'div', class_='bar-graph__section'
    )
    
    categories = breed_soup.find_all(
        'div', class_='bar-graph__text'
    )
    
    care_dict = {}
    
    for (title, value, category) in zip (titles, values, categories):
        t = title.get_text().lower().replace(' ', '_')
        t = t[t.find('/') + 1:]
        care_dict[t + '_value'] = float(
            value['style'].split()[1].split('%')[0]
        ) / 100
        care_dict[t + '_category'] = category.get_text()
        
    return care_dict

In [8]:
# Working
def get_breed_characteristics(breed_soup):
    breed_characteristics = {}
    characteristic_classes = [tab_element.getText() for tab_element in breed_soup.find_all('li', class_='tabs__single-tab')]
    for characteristic_class in characteristic_classes:
        if('Traits' not in characteristic_class):
            characteristic_class_name = f'breed-page__traits__{characteristic_class.lower().replace(" ", "-")}'
            characteristics_div = breed_soup.find('div', id=characteristic_class_name)
            breed_traits = characteristics_div.find_all('div', class_='breed-trait-group__trait')
            breed_group_characteristics = {}
            for breed_trait in breed_traits:
                trait = breed_trait.find('h4', class_='accordion__header__text').getText()
                if trait in ['Coat Type', 'Coat Length']:
                    coatParentDivs = breed_trait.find_all(class_='breed-trait-score__choice--selected')
                    coatNames = []
                    for coatParentDiv in coatParentDivs:
                        span_text = coatParentDiv.find('span').text
                        coatNames.append(span_text)
                    trait_info = ",".join(coatNames)
                else:
                    trait_info = len(breed_trait.find_all('div', class_='breed-trait-score__score-unit breed-trait-score__score-unit--filled'))
                breed_group_characteristics[trait] = trait_info
                breed_characteristics[trait] = trait_info
    return breed_characteristics

In [9]:
# Images
def get_breed_images(breed_soup):
    # Extract og:image URL
        og_image_tag = breed_soup.find('meta', attrs={'property': 'og:image'})
        if og_image_tag:
            return {'images': [og_image_tag['content']]}
        else:
            return {'images': []}

In [10]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup

class Breed:
    def __init__(self, breed_url, breed_name):
        self.url = breed_url
        self.name = breed_name
        
        # Set up a headless Chrome browser
        options = webdriver.ChromeOptions()
        options.add_argument('--headless')  # Run in headless mode (no GUI)
        driver = webdriver.Chrome(options=options)

        # Load the web page
        driver.get(breed_url)

        # Get the page source after content has loaded
        page_source = driver.page_source

        # Create a BeautifulSoup object from the page source
        breed_soup = BeautifulSoup(page_source, 'html.parser')

        # Scrape content as needed
        self.breed_info = {}
        self.breed_info['breed'] = self.name
        self.breed_info['description'] = get_description(breed_soup)
        self.breed_info['temperament'] = get_temperament(breed_soup)
        self.breed_info.update(get_main_attributes(breed_soup))
        self.breed_info.update(get_care_info(breed_soup))
        self.breed_info.update(get_breed_characteristics(breed_soup))
        self.breed_info.update(get_breed_images(breed_soup))

        # Close the browser
        driver.quit()

    def get_breed_info(self):
        return self.breed_info


In [11]:
import requests
from bs4 import BeautifulSoup
from bs4 import Tag
from tqdm import tqdm  # Import tqdm for the progress bar

def get_data():
    page = requests.get('https://www.akc.org/dog-breeds/')
    soup = BeautifulSoup(page.content, 'html.parser')
    
    breed_select = soup.find('select', id='breed-search')

    breeds = []
    
    for tag in breed_select.children:
        if isinstance(tag, Tag):
            if 'value' in tag.attrs and tag['value']:
                breeds.append(tag)

    print(len(breeds))
                
    breed_dict = {}
    
    # Use tqdm to create a progress bar
    for breed in tqdm(breeds[250:],desc='scraping Breeds'):
        breed_name = breed.get_text()
        breed_url = breed['value']
        breed_info = Breed(breed_url, breed_name).get_breed_info()
        breed_dict[breed_name] = breed_info
    
#     print('breed_dict', breed_dict)
    
    return breed_dict

# Define the Breed class and its methods here if not already defined

# Call get_data() to start the scraping process

In [12]:
breed_dict = get_data()

288


scraping Breeds:   0%|                                                                          | 0/50 [00:00<?, ?it/s]

breed_attributes {'akc_breed_popularity': '  181', 'min_height': 17.0, 'max_height': 20.0, 'min_weight': 30.0, 'max_weight': 50.0, 'min_expectancy': 12.0, 'max_expectancy': 14.0, 'group': 'Herding Group'}


scraping Breeds:   2%|█▎                                                                | 1/50 [00:10<08:24, 10.30s/it]

breed_attributes {'akc_breed_popularity': '  23', 'min_height': 6.0, 'max_height': 7.0, 'min_weight': 3.0, 'max_weight': 7.0, 'min_expectancy': 12.0, 'max_expectancy': 16.0, 'group': 'Toy Group'}


scraping Breeds:   4%|██▋                                                               | 2/50 [00:23<09:31, 11.90s/it]

breed_attributes {'akc_breed_popularity': '', 'min_height': 20.0, 'max_height': 23.0, 'min_weight': 33.0, 'max_weight': 48.0, 'min_expectancy': 12.0, 'max_expectancy': 14.0, 'group': 'Foundation Stock Service'}


scraping Breeds:   6%|███▉                                                              | 3/50 [00:33<08:52, 11.33s/it]

breed_attributes {'akc_breed_popularity': '  7', 'min_height': 10.0, 'max_height': 15.0, 'min_weight': 10.0, 'max_weight': 15.0, 'min_expectancy': 10.0, 'max_expectancy': 18.0, 'group': 'Non-Sporting Group'}


scraping Breeds:   8%|█████▎                                                            | 4/50 [00:48<09:36, 12.53s/it]

breed_attributes {'akc_breed_popularity': '  7', 'min_height': 15.0, 'max_height': 15.0, 'min_weight': 40.0, 'max_weight': 70.0, 'min_expectancy': 10.0, 'max_expectancy': 18.0, 'group': 'Non-Sporting Group'}


scraping Breeds:  10%|██████▌                                                           | 5/50 [00:59<08:56, 11.92s/it]

breed_attributes {'akc_breed_popularity': '  7', 'min_height': 10.0, 'max_height': 10.0, 'min_weight': 4.0, 'max_weight': 6.0, 'min_expectancy': 10.0, 'max_expectancy': 18.0, 'group': 'Toy Group'}


scraping Breeds:  12%|███████▉                                                          | 6/50 [01:09<08:25, 11.48s/it]

breed_attributes {'akc_breed_popularity': '', 'min_height': 22.0, 'max_height': 23.0, 'min_weight': 55.0, 'max_weight': 62.0, 'min_expectancy': 12.0, 'max_expectancy': 13.0, 'group': 'Foundation Stock Service'}


scraping Breeds:  14%|█████████▏                                                        | 7/50 [01:24<08:55, 12.44s/it]

breed_attributes {'akc_breed_popularity': '', 'min_height': 16.0, 'max_height': 28.0, 'min_weight': 35.0, 'max_weight': 66.0, 'min_expectancy': 10.0, 'max_expectancy': 15.0, 'group': 'Miscellaneous Class'}


scraping Breeds:  16%|██████████▌                                                       | 8/50 [01:34<08:13, 11.74s/it]

breed_attributes {'akc_breed_popularity': '  171', 'min_height': 8.0, 'max_height': 12.0, 'min_weight': 9.0, 'max_weight': 13.0, 'min_expectancy': 12.0, 'max_expectancy': 15.0, 'group': 'Hound Group'}


scraping Breeds:  18%|███████████▉                                                      | 9/50 [01:45<07:50, 11.47s/it]

breed_attributes {'akc_breed_popularity': '', 'min_height': 20.5, 'max_height': 22.0, 'min_weight': 35.0, 'max_weight': 59.0, 'min_expectancy': 14.0, 'max_expectancy': 14.0, 'group': 'Foundation Stock Service'}


scraping Breeds:  20%|█████████████                                                    | 10/50 [01:56<07:28, 11.22s/it]

breed_attributes {'akc_breed_popularity': '', 'min_height': 16.5, 'max_height': 21.5, 'min_weight': 37.5, 'max_weight': 59.0, 'min_expectancy': 12.0, 'max_expectancy': 13.0, 'group': 'Foundation Stock Service'}


scraping Breeds:  22%|██████████████▎                                                  | 11/50 [02:07<07:20, 11.29s/it]

breed_attributes {'akc_breed_popularity': '  46', 'min_height': 17.0, 'max_height': 23.0, 'min_weight': 35.0, 'max_weight': 60.0, 'min_expectancy': 11.0, 'max_expectancy': 13.0, 'group': 'Working Group'}


scraping Breeds:  24%|███████████████▌                                                 | 12/50 [02:24<08:18, 13.13s/it]

breed_attributes {'akc_breed_popularity': '', 'min_height': 22.0, 'max_height': 26.0, 'min_weight': 45.0, 'max_weight': 70.0, 'min_expectancy': 14.0, 'max_expectancy': 14.0, 'group': 'Foundation Stock Service'}


scraping Breeds:  26%|████████████████▉                                                | 13/50 [02:36<07:47, 12.64s/it]

breed_attributes {'akc_breed_popularity': '  35', 'min_height': 10.0, 'max_height': 13.0, 'min_weight': 14.0, 'max_weight': 18.0, 'min_expectancy': 13.0, 'max_expectancy': 15.0, 'group': 'Toy Group'}


scraping Breeds:  28%|██████████████████▏                                              | 14/50 [02:47<07:21, 12.25s/it]

breed_attributes {'akc_breed_popularity': '  161', 'min_height': 16.0, 'max_height': 17.0, 'min_weight': 25.0, 'max_weight': 35.0, 'min_expectancy': 10.0, 'max_expectancy': 15.0, 'group': 'Herding Group'}


scraping Breeds:  30%|███████████████████▌                                             | 15/50 [02:58<06:57, 11.93s/it]

breed_attributes {'akc_breed_popularity': '  140', 'min_height': 15.0, 'max_height': 18.5, 'min_weight': 22.0, 'max_weight': 29.0, 'min_expectancy': 12.0, 'max_expectancy': 13.0, 'group': 'Herding Group'}


scraping Breeds:  32%|████████████████████▊                                            | 16/50 [03:09<06:33, 11.57s/it]

breed_attributes {'akc_breed_popularity': '', 'min_height': 25.0, 'max_height': 31.0, 'min_weight': 120.0, 'max_weight': 240.0, 'min_expectancy': 10.0, 'max_expectancy': 13.0, 'group': 'Foundation Stock Service'}


scraping Breeds:  34%|██████████████████████                                           | 17/50 [03:20<06:12, 11.28s/it]

breed_attributes {'akc_breed_popularity': '  190', 'min_height': 15.0, 'max_height': 21.0, 'min_weight': 15.0, 'max_weight': 30.0, 'min_expectancy': 17.0, 'max_expectancy': 19.0, 'group': 'Herding Group'}


scraping Breeds:  36%|███████████████████████▍                                         | 18/50 [03:45<08:18, 15.57s/it]

breed_attributes {'akc_breed_popularity': '', 'min_height': 25.0, 'max_height': 29.0, 'min_weight': 77.0, 'max_weight': 132.0, 'min_expectancy': 12.0, 'max_expectancy': 14.0, 'group': 'Foundation Stock Service'}


scraping Breeds:  38%|████████████████████████▋                                        | 19/50 [03:56<07:20, 14.22s/it]

breed_attributes {'akc_breed_popularity': '  85', 'min_height': 10.0, 'max_height': 18.0, 'min_weight': 10.0, 'max_weight': 25.0, 'min_expectancy': 12.0, 'max_expectancy': 18.0, 'group': 'Terrier Group'}


scraping Breeds:  40%|██████████████████████████                                       | 20/50 [04:09<06:56, 13.89s/it]

breed_attributes {'akc_breed_popularity': '  158', 'min_height': 21.0, 'max_height': 27.0, 'min_weight': 45.0, 'max_weight': 70.0, 'min_expectancy': 12.0, 'max_expectancy': 15.0, 'group': 'Hound Group'}


scraping Breeds:  42%|███████████████████████████▎                                     | 21/50 [04:20<06:14, 12.92s/it]

breed_attributes {'akc_breed_popularity': '  41', 'min_height': 24.0, 'max_height': 27.0, 'min_weight': 70.0, 'max_weight': 85.0, 'min_expectancy': 10.0, 'max_expectancy': 12.0, 'group': 'Hound Group'}


scraping Breeds:  44%|████████████████████████████▌                                    | 22/50 [04:31<05:42, 12.23s/it]

breed_attributes {'akc_breed_popularity': '', 'min_height': 23.0, 'max_height': 29.0, 'min_weight': 70.0, 'max_weight': 100.0, 'min_expectancy': 12.0, 'max_expectancy': 14.0, 'group': 'Foundation Stock Service'}


scraping Breeds:  46%|█████████████████████████████▉                                   | 23/50 [04:41<05:13, 11.61s/it]

breed_attributes {'akc_breed_popularity': '', 'min_height': 22.5, 'max_height': 29.0, 'min_weight': 100.0, 'max_weight': 130.0, 'min_expectancy': 12.0, 'max_expectancy': 14.0, 'group': 'Foundation Stock Service'}


scraping Breeds:  48%|███████████████████████████████▏                                 | 24/50 [04:54<05:13, 12.05s/it]

breed_attributes {'akc_breed_popularity': '  7', 'min_height': 22.0, 'max_height': 27.0, 'min_weight': 80.0, 'max_weight': 135.0, 'min_expectancy': 9.0, 'max_expectancy': 10.0, 'group': 'Working Group'}


scraping Breeds:  50%|████████████████████████████████▌                                | 25/50 [05:06<05:03, 12.14s/it]

breed_attributes {'akc_breed_popularity': '  73', 'min_height': 10.0, 'max_height': 12.0, 'min_weight': 9.0, 'max_weight': 15.0, 'min_expectancy': 12.0, 'max_expectancy': 14.0, 'group': 'Terrier Group'}


scraping Breeds:  52%|█████████████████████████████████▊                               | 26/50 [05:17<04:40, 11.68s/it]

breed_attributes {'akc_breed_popularity': '  144', 'min_height': 8.0, 'max_height': 11.0, 'min_weight': 6.5, 'max_weight': 6.5, 'min_expectancy': 12.0, 'max_expectancy': 14.0, 'group': 'Toy Group'}


scraping Breeds:  54%|███████████████████████████████████                              | 27/50 [05:27<04:19, 11.28s/it]

breed_attributes {'akc_breed_popularity': '', 'min_height': 11.0, 'max_height': 11.0, 'min_weight': 6.0, 'max_weight': 9.0, 'min_expectancy': 14.0, 'max_expectancy': 20.0, 'group': 'Miscellaneous Class'}


scraping Breeds:  56%|████████████████████████████████████▍                            | 28/50 [05:38<04:02, 11.04s/it]

breed_attributes {'akc_breed_popularity': '  56', 'min_height': 26.0, 'max_height': 30.0, 'min_weight': 120.0, 'max_weight': 180.0, 'min_expectancy': 8.0, 'max_expectancy': 10.0, 'group': 'Working Group'}


scraping Breeds:  58%|█████████████████████████████████████▋                           | 29/50 [05:48<03:48, 10.86s/it]

breed_attributes {'akc_breed_popularity': '  135', 'min_height': 23.0, 'max_height': 28.0, 'min_weight': 40.0, 'max_weight': 65.0, 'min_expectancy': 10.0, 'max_expectancy': 17.0, 'group': 'Hound Group'}


scraping Breeds:  60%|███████████████████████████████████████                          | 30/50 [05:59<03:37, 10.87s/it]

breed_attributes {'akc_breed_popularity': '  53', 'min_height': 19.0, 'max_height': 23.5, 'min_weight': 35.0, 'max_weight': 65.0, 'min_expectancy': 12.0, 'max_expectancy': 14.0, 'group': 'Working Group'}


scraping Breeds:  62%|████████████████████████████████████████▎                        | 31/50 [06:10<03:24, 10.77s/it]

breed_attributes {'akc_breed_popularity': '', 'min_height': 16.0, 'max_height': 20.0, 'min_weight': 26.0, 'max_weight': 55.0, 'min_expectancy': 12.0, 'max_expectancy': 15.0, 'group': 'Foundation Stock Service'}


scraping Breeds:  64%|█████████████████████████████████████████▌                       | 32/50 [06:22<03:21, 11.18s/it]

breed_attributes {'akc_breed_popularity': '  115', 'min_height': 10.0, 'max_height': 13.0, 'min_weight': 10.0, 'max_weight': 16.0, 'min_expectancy': 12.0, 'max_expectancy': 16.0, 'group': 'Non-Sporting Group'}


scraping Breeds:  66%|██████████████████████████████████████████▉                      | 33/50 [06:32<03:06, 10.97s/it]

breed_attributes {'akc_breed_popularity': '  155', 'min_height': 28.0, 'max_height': 32.0, 'min_weight': 75.0, 'max_weight': 110.0, 'min_expectancy': 8.0, 'max_expectancy': 11.0, 'group': 'Hound Group'}


scraping Breeds:  68%|████████████████████████████████████████████▏                    | 34/50 [06:43<02:53, 10.86s/it]

breed_attributes {'akc_breed_popularity': '  60', 'min_height': 10.0, 'max_height': 10.0, 'min_weight': 18.0, 'max_weight': 22.0, 'min_expectancy': 12.0, 'max_expectancy': 12.0, 'group': 'Terrier Group'}


scraping Breeds:  70%|█████████████████████████████████████████████▌                   | 35/50 [06:53<02:41, 10.75s/it]

breed_attributes {'akc_breed_popularity': '  164', 'min_height': 10.5, 'max_height': 10.5, 'min_weight': 23.0, 'max_weight': 24.0, 'min_expectancy': 12.0, 'max_expectancy': 14.0, 'group': 'Terrier Group'}


scraping Breeds:  72%|██████████████████████████████████████████████▊                  | 36/50 [07:04<02:29, 10.67s/it]

breed_attributes {'akc_breed_popularity': '', 'min_height': 19.0, 'max_height': 24.5, 'min_weight': 39.0, 'max_weight': 62.0, 'min_expectancy': 11.0, 'max_expectancy': 13.0, 'group': 'Foundation Stock Service'}


scraping Breeds:  74%|████████████████████████████████████████████████                 | 37/50 [07:21<02:43, 12.58s/it]

breed_attributes {'akc_breed_popularity': '  27', 'min_height': 13.0, 'max_height': 16.0, 'min_weight': 15.0, 'max_weight': 25.0, 'min_expectancy': 12.0, 'max_expectancy': 14.0, 'group': 'Herding Group'}


scraping Breeds:  76%|█████████████████████████████████████████████████▍               | 38/50 [07:43<03:05, 15.45s/it]

breed_attributes {'akc_breed_popularity': '  43', 'min_height': 13.5, 'max_height': 16.5, 'min_weight': 17.0, 'max_weight': 23.0, 'min_expectancy': 13.0, 'max_expectancy': 16.0, 'group': 'Non-Sporting Group'}


scraping Breeds:  78%|██████████████████████████████████████████████████▋              | 39/50 [07:54<02:34, 14.05s/it]

breed_attributes {'akc_breed_popularity': '  20', 'min_height': 9.0, 'max_height': 10.5, 'min_weight': 9.0, 'max_weight': 16.0, 'min_expectancy': 10.0, 'max_expectancy': 18.0, 'group': 'Toy Group'}


scraping Breeds:  80%|████████████████████████████████████████████████████             | 40/50 [08:04<02:10, 13.05s/it]

breed_attributes {'akc_breed_popularity': '', 'min_height': 17.0, 'max_height': 22.0, 'min_weight': 35.0, 'max_weight': 55.0, 'min_expectancy': 10.0, 'max_expectancy': 12.0, 'group': 'Foundation Stock Service'}


scraping Breeds:  82%|█████████████████████████████████████████████████████▎           | 41/50 [08:15<01:50, 12.24s/it]

breed_attributes {'akc_breed_popularity': '  21', 'min_height': 20.0, 'max_height': 23.5, 'min_weight': 35.0, 'max_weight': 60.0, 'min_expectancy': 12.0, 'max_expectancy': 14.0, 'group': 'Working Group'}


scraping Breeds:  84%|██████████████████████████████████████████████████████▌          | 42/50 [08:26<01:34, 11.87s/it]

breed_attributes {'akc_breed_popularity': '  122', 'min_height': 9.0, 'max_height': 10.0, 'min_weight': 10.0, 'max_weight': 10.0, 'min_expectancy': 13.0, 'max_expectancy': 15.0, 'group': 'Toy Group'}


scraping Breeds:  86%|███████████████████████████████████████████████████████▉         | 43/50 [08:36<01:19, 11.41s/it]

breed_attributes {'akc_breed_popularity': '  189', 'min_height': 9.5, 'max_height': 10.0, 'min_weight': 35.0, 'max_weight': 45.0, 'min_expectancy': 12.0, 'max_expectancy': 14.0, 'group': 'Terrier Group'}


scraping Breeds:  88%|█████████████████████████████████████████████████████████▏       | 44/50 [08:47<01:06, 11.12s/it]

breed_attributes {'akc_breed_popularity': '  197', 'min_height': 24.0, 'max_height': 29.0, 'min_weight': 35.0, 'max_weight': 50.0, 'min_expectancy': 10.0, 'max_expectancy': 15.0, 'group': 'Hound Group'}


scraping Breeds:  90%|██████████████████████████████████████████████████████████▌      | 45/50 [08:57<00:54, 10.89s/it]

breed_attributes {'akc_breed_popularity': '', 'min_height': 22.5, 'max_height': 26.5, 'min_weight': 50.0, 'max_weight': 65.0, 'min_expectancy': 12.0, 'max_expectancy': 15.0, 'group': 'Foundation Stock Service'}


scraping Breeds:  92%|███████████████████████████████████████████████████████████▊     | 46/50 [09:08<00:43, 10.82s/it]

breed_attributes {'akc_breed_popularity': '', 'min_height': 23.0, 'max_height': 27.5, 'min_weight': 68.0, 'max_weight': 97.0, 'min_expectancy': 11.0, 'max_expectancy': 13.0, 'group': 'Foundation Stock Service'}


scraping Breeds:  94%|█████████████████████████████████████████████████████████████    | 47/50 [09:18<00:32, 10.72s/it]

breed_attributes {'akc_breed_popularity': '', 'min_height': 16.0, 'max_height': 20.0, 'min_weight': 33.0, 'max_weight': 44.0, 'min_expectancy': 12.0, 'max_expectancy': 13.0, 'group': 'Foundation Stock Service'}


scraping Breeds:  96%|██████████████████████████████████████████████████████████████▍  | 48/50 [09:29<00:21, 10.64s/it]

breed_attributes {'akc_breed_popularity': '', 'min_height': 20.5, 'max_height': 21.0, 'min_weight': 40.0, 'max_weight': 60.0, 'min_expectancy': 12.0, 'max_expectancy': 14.0, 'group': 'Miscellaneous Class'}


scraping Breeds:  98%|███████████████████████████████████████████████████████████████▋ | 49/50 [09:40<00:10, 10.74s/it]

breed_attributes {'akc_breed_popularity': '  141', 'min_height': 15.0, 'max_height': 15.0, 'min_weight': 15.0, 'max_weight': 18.0, 'min_expectancy': 12.0, 'max_expectancy': 15.0, 'group': 'Terrier Group'}


scraping Breeds: 100%|█████████████████████████████████████████████████████████████████| 50/50 [09:51<00:00, 11.82s/it]


In [13]:
breed_df = pd.DataFrame.from_dict(breed_dict, orient='index')

In [14]:
breed_df

Unnamed: 0,breed,description,temperament,akc_breed_popularity,min_height,max_height,min_weight,max_weight,min_expectancy,max_expectancy,...,Coat Length,Openness To Strangers,Playfulness Level,Watchdog/Protective Nature,Adaptability Level,Trainability Level,Energy Level,Barking Level,Mental Stimulation Needs,images
Polish Lowland Sheepdog,Polish Lowland Sheepdog,A shaggy-coated herding dog who thrives on exe...,"confident,lively,clever",181.0,17.0,20.0,30.0,50.0,12.0,14.0,...,Long,3,4,5,5,4,3,4,4,[https://www.akc.org/wp-content/uploads/2017/1...
Pomeranian,Pomeranian,"The tiny Pomeranian, long a favorite of royals...","inquisitive,lively,bold",23.0,6.0,7.0,3.0,7.0,12.0,16.0,...,Long,3,3,4,4,3,3,4,3,[https://www.akc.org/wp-content/uploads/2017/1...
Pont-Audemer Spaniel,Pont-Audemer Spaniel,The Pont-Audemer Spaniel is used to hunt a var...,"affectionate,gentle,fun-loving",,20.0,23.0,33.0,48.0,12.0,14.0,...,Medium,3,4,1,5,4,3,1,3,[https://s3.amazonaws.com/cdn-origin-etr.akc.o...
Poodle (Miniature),Poodle (Miniature),"Whether Standard, Miniature, or Toy, and eithe...","active,proud,very smart",7.0,10.0,15.0,10.0,15.0,10.0,18.0,...,Long,5,5,3,4,5,4,4,5,[https://www.akc.org/wp-content/uploads/2017/1...
Poodle (Standard),Poodle (Standard),"Whether Standard, Miniature, or Toy, and eithe...","active,proud,very smart",7.0,15.0,15.0,40.0,70.0,10.0,18.0,...,Long,5,5,5,4,5,4,4,5,[https://s3.amazonaws.com/cdn-origin-etr.akc.o...
Poodle (Toy),Poodle (Toy),"Despite his diminutive size, the Toy Poodle st...","intelligent,agile,self-confident",7.0,10.0,10.0,4.0,6.0,10.0,18.0,...,Long,5,5,3,4,5,4,4,5,[https://www.akc.org/wp-content/uploads/2018/0...
Porcelaine,Porcelaine,"The Porcelaine is elegant, energetic, and a fi...","independent,energetic,powerful",,22.0,23.0,55.0,62.0,12.0,13.0,...,Short,5,5,3,4,5,4,2,4,[https://www.akc.org/wp-content/uploads/2017/1...
Portuguese Podengo,Portuguese Podengo,"Portuguese Podengos are lively, agile, playful...","alert,independent,intelligent",,16.0,28.0,35.0,66.0,10.0,15.0,...,Medium,3,4,3,4,3,4,3,4,[https://www.akc.org/wp-content/uploads/2017/1...
Portuguese Podengo Pequeno,Portuguese Podengo Pequeno,"Classified as a hound, the alert and playful l...","playful,charming,lively",171.0,8.0,12.0,9.0,13.0,12.0,15.0,...,Short,3,4,3,4,3,4,3,4,[https://www.akc.org/wp-content/uploads/2017/1...
Portuguese Pointer,Portuguese Pointer,The Portuguese Pointer is a very old hunting b...,"energetic,affectionate,smart",,20.5,22.0,35.0,59.0,14.0,14.0,...,Short,4,4,3,4,5,5,3,5,[https://www.akc.org/wp-content/uploads/2017/1...


In [15]:
excel_file_path = 'to_excel.xlsx'

In [16]:
breed_df.to_excel(excel_file_path, index=False)