# Setup

### Import Dependencies

In [1]:
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup as bs
import re
import requests
import time
import pandas as pd
import unicodedata
import random

### Set URLs

In [2]:
# base URL for race results
base_url = "https://results.nyrr.org/event/"

# specific races

race_code = "M2019" # 2019 NYC Marathon. includes gun time and splits

# race_code = "20WH5K" # 2020 Washington Heights 5K. 4942 finishers. yes to gun time, no to splits

# race_code = "a70414" # 2007 Brooklyn Half. 4853 finishers. no gun time, no splits

# race_code = "19nyc60" # 2019 NYC 60K. 375 finishers, gun time, no splits

# race_code = "1860k" # 2018 NYC 60K. 360 finishers, gun time, no splits

# race_code = "1760K" # 2017 NYC 60K. 358 finishers (plus one handcycle), gun time, no splits

# race_code = "60K16" # 2016 NYC 60K. 366 finishers, gun time, no splits. 2 runners do not have an address.

race_url = base_url + race_code + "/result/"

### Set File Locations

In [3]:
# Set path for chromedriver
executable_path = {'chromedriver.exe'}

# set paths for outputs
output_csv_path = "../00_data/raw_data/"
output_csv_name_base = "finishers"

log_csv_path = "../00_data/raw_data/"
log_csv_name_base = "scraping_log"

### Set Scraping Parameters

In [4]:
# lowest bib number in scraping range
low_bib = 72001

# highest bib number in scraping range
high_bib = 75000

# add any additional targets
additional_scraping_targets = []


# Scraping

### Define scraping function

In [5]:
def finisher_scraper(soup):
    finisher_dict = {}
# block 1
    thangs = soup.find(class_='cursor-pointer results-link').findChildren()
    finisher_dict['name'] = thangs[0].text
    
    # The address is presented as a subregion, then a pipe "|", then a country code.
    # For US addresses, the subregion is presented as town followed by state abbreviation.
    # Some runners don't have a full address listed, so carefully parse out what is there.
    # Assume that the most basic information would be the runner's country (i.e. country of residence).
    # Note that these are ISO 3166-1 country codes, e.g. Germany = 'DEU'.
    # These items do not have labels (e.g. 'country') associated with them, so
    # the keys are hardcoded here.
    
    if '|' in thangs[1].text:
        finisher_dict['geo_subregion'] = thangs[1].text.split('|')[0].strip()
        finisher_dict['country'] = thangs[1].text.split('|')[1].strip()
    else:
        finisher_dict['country'] = thangs[1].text.strip()
    
    for thing in soup.find_all('strong'):
        if thing.next_sibling:
            
            #look for gender and age, and record those separately.
            
            pattern = re.compile('[MF][1-9][0-9]?')
            if pattern.match(thing.next_sibling.strip()):
                finisher_dict['gender'] = thing.next_sibling.strip()[0]
                finisher_dict['age'] = thing.next_sibling.strip()[1:]
                continue
                
            # I prefer the keys and columns be lowercase without spaces, so make those changes
            # while reading in the keys.
            
            finisher_dict[thing.text.strip().lower().replace(" ", "_").replace("/", "_and_")] = thing.next_sibling.strip()
            
# block 2 and block 3
    regex = re.compile('.*form-group-item.*')
#     block_2_soup = soup.find_all("div", {"class" : regex})[:4] # this gets only block 2
    block_2_soup = soup.find_all("div", {"class" : regex}) # this gets block 3 as well
    for spoonful in block_2_soup:
        
        # The data found here can vary from race to race and runner to runner. These items have labels
        # associated with them. To be flexible, we read the label itself and use it as the dict key.
        # Not every runner will have a full set of label-value pairs, and the pair vary from race to race.
        # For example, a runner's bib might not get recorded at a particular checkpoint, and a half marathon
        # would not have as many checkpoints as a marathon.
        
        labels = spoonful.find_all('label')
        values = spoonful.find_all('span', {'class': 'label-value ng-binding'})
        
        # Step through all the labels and add each key(label)-value pair to the dict.
        
        for x in range(0, len(labels)):
            
            # there was a '\xa0' coming in as part of the 'Place Country' label
            # use unicodedata.normalize to replace that with a space.
            
            label = unicodedata.normalize("NFKD", labels[x].text)
            
            # if we find the place in age-group, add the age group to the dictionary.
            
            if label.endswith("Place Age‐Group"):
                finisher_dict["age_group"] = re.findall(r'\(([^]]*)\)', label)[0]
                label = "Place Age-Group"
                
            # If we find the place in country, add the country group (i.e. country of citizenship) to the dictionary.
            # Note that this is not necessarily the same as the runner's address.
            # This country code is in IOC format, e.g. Germany = 'GER'.
                
            elif label.endswith("Place Country"):
                finisher_dict["country_group"] = re.findall(r'\(([^]]*)\)', label)[0]
                label = "Place Country"
                
            # Add pairs to dict. Make sure keys are lowecase without spaces.
            
            finisher_dict[label.strip().lower().replace(" ", "_")] = values[x].text


    return finisher_dict
    

In [6]:
def scraping_loop(race_url, scrape_list):
    
    # These lists will be returned.
    finishers = []
    dnf_list = []
    scraping_log = []
    error_list = []

    # These counters are used for reporting progress.
    attempt_counter = 0
    hit_counter = 0
    miss_counter = 0
    error_counter = 0
    
    browser = Browser('chrome', headless=False)

    for bib_number in scrape_list:
        url = race_url + str(bib_number)
        browser.visit(url)
        # We need a short sleep period to give the site a chance to redirect an invalid bib number.
        # We also don't need to beat on the website too hard.
        time.sleep(3)
    #     time.sleep(random.uniform(3,7))
        attempt_counter += 1
        
        # If the url does not redirect, then the bib number crossed the finish line. Scrape it.
        if browser.url == url:
            try:
                soup = bs(browser.html, "html.parser")
                new_result = finisher_scraper(soup)

                # We're being intentionally flexible about what data we gather, 
                # but bib number and finish time are the bare minimum.
                # Check to see if there is a bib number and official finish time included in the finisher_dict.
                if (new_result.get('bib') and new_result.get('name')):
                    finishers.append(new_result)
                    scraping_log.append({'bib': bib_number, 'outcome': 'success'})
                    hit_counter += 1
                    
                # If the url was good, but bib number and official time are missing, then something went wrong during
                # scraping. Log it as an error.
                else:
                    scraping_log.append({'bib': bib_number, 'outcome': 'error'})
                    error_list.append(bib_number)
                    error_counter += 1

            # If bs can't even parse the page, something went wrong. Log it as an error.
            except:
                scraping_log.append({'bib': bib_number, 'outcome': 'error'})
                error_list.append(bib_number)
                error_counter += 1
        
        # If the url resulted in a redirect, then that bib number never crossed the finish line. Log it as DNF.
        # Note that the bib may never have been issued, or the runner may have dropped out 50m from the finish.
        # All we know is that the bib was not logged as a finisher.
        else:
            scraping_log.append({'bib': bib_number, 'outcome': 'DNF'})
            dnf_list.append(bib_number)
            miss_counter += 1
        
        # Report running totals of the scraping progress.
        print(f"Attempted {attempt_counter} of {len(scrape_list)}: Found {hit_counter} finishers ({(hit_counter/attempt_counter):.2%}), {miss_counter} DNFs, and {error_counter} errors.")
    
    print('Loop complete')
    print(f"{attempt_counter} attempts yielded {hit_counter} hits ({(hit_counter/attempt_counter):.2%}) with {error_counter} errors.")
    browser.quit()
    
    return [finishers, dnf_list, error_list, scraping_log]

### Scraping Loop

In [7]:
# Build the list of bibs to scrape.
scrape_list = list(range(low_bib,high_bib+1))

# Add in any additional scraping targets, if applicable.
scrape_list.extend(additional_scraping_targets)
    
# Shuffle the list. This helps us to get an ide of the 'richness' of the specified bib range is after a few dozen scrapes.
random.shuffle(scrape_list)

In [8]:
# scrape through the list
scrape_results = scraping_loop(race_url, scrape_list)
runners = scrape_results[0]
scraping_log = scrape_results[3]

# Go back and retry any bibs that didn't load on the first try.
if scrape_results[2]:
    error_list = scrape_results[2]
    second_scrape = scraping_loop(race_url, error_list)
    runners.extend(second_scrape[0])
    scraping_log.extend(second_scrape[3])

Attempted 1 of 3000: Found 1 finishers (100.00%), 0 DNFs, and 0 errors.
Attempted 2 of 3000: Found 1 finishers (50.00%), 1 DNFs, and 0 errors.
Attempted 3 of 3000: Found 2 finishers (66.67%), 1 DNFs, and 0 errors.
Attempted 4 of 3000: Found 2 finishers (50.00%), 2 DNFs, and 0 errors.
Attempted 5 of 3000: Found 2 finishers (40.00%), 3 DNFs, and 0 errors.
Attempted 6 of 3000: Found 2 finishers (33.33%), 4 DNFs, and 0 errors.
Attempted 7 of 3000: Found 2 finishers (28.57%), 5 DNFs, and 0 errors.
Attempted 8 of 3000: Found 3 finishers (37.50%), 5 DNFs, and 0 errors.
Attempted 9 of 3000: Found 3 finishers (33.33%), 6 DNFs, and 0 errors.
Attempted 10 of 3000: Found 3 finishers (30.00%), 7 DNFs, and 0 errors.
Attempted 11 of 3000: Found 3 finishers (27.27%), 8 DNFs, and 0 errors.
Attempted 12 of 3000: Found 3 finishers (25.00%), 9 DNFs, and 0 errors.
Attempted 13 of 3000: Found 4 finishers (30.77%), 9 DNFs, and 0 errors.
Attempted 14 of 3000: Found 4 finishers (28.57%), 10 DNFs, and 0 errors.

Attempted 113 of 3000: Found 19 finishers (16.81%), 94 DNFs, and 0 errors.
Attempted 114 of 3000: Found 19 finishers (16.67%), 95 DNFs, and 0 errors.
Attempted 115 of 3000: Found 19 finishers (16.52%), 96 DNFs, and 0 errors.
Attempted 116 of 3000: Found 19 finishers (16.38%), 97 DNFs, and 0 errors.
Attempted 117 of 3000: Found 20 finishers (17.09%), 97 DNFs, and 0 errors.
Attempted 118 of 3000: Found 20 finishers (16.95%), 98 DNFs, and 0 errors.
Attempted 119 of 3000: Found 21 finishers (17.65%), 98 DNFs, and 0 errors.
Attempted 120 of 3000: Found 21 finishers (17.50%), 99 DNFs, and 0 errors.
Attempted 121 of 3000: Found 21 finishers (17.36%), 100 DNFs, and 0 errors.
Attempted 122 of 3000: Found 21 finishers (17.21%), 101 DNFs, and 0 errors.
Attempted 123 of 3000: Found 21 finishers (17.07%), 102 DNFs, and 0 errors.
Attempted 124 of 3000: Found 22 finishers (17.74%), 102 DNFs, and 0 errors.
Attempted 125 of 3000: Found 22 finishers (17.60%), 103 DNFs, and 0 errors.
Attempted 126 of 300

Attempted 221 of 3000: Found 43 finishers (19.46%), 178 DNFs, and 0 errors.
Attempted 222 of 3000: Found 44 finishers (19.82%), 178 DNFs, and 0 errors.
Attempted 223 of 3000: Found 44 finishers (19.73%), 179 DNFs, and 0 errors.
Attempted 224 of 3000: Found 44 finishers (19.64%), 180 DNFs, and 0 errors.
Attempted 225 of 3000: Found 44 finishers (19.56%), 181 DNFs, and 0 errors.
Attempted 226 of 3000: Found 44 finishers (19.47%), 182 DNFs, and 0 errors.
Attempted 227 of 3000: Found 45 finishers (19.82%), 182 DNFs, and 0 errors.
Attempted 228 of 3000: Found 45 finishers (19.74%), 183 DNFs, and 0 errors.
Attempted 229 of 3000: Found 45 finishers (19.65%), 184 DNFs, and 0 errors.
Attempted 230 of 3000: Found 45 finishers (19.57%), 185 DNFs, and 0 errors.
Attempted 231 of 3000: Found 45 finishers (19.48%), 186 DNFs, and 0 errors.
Attempted 232 of 3000: Found 45 finishers (19.40%), 187 DNFs, and 0 errors.
Attempted 233 of 3000: Found 45 finishers (19.31%), 188 DNFs, and 0 errors.
Attempted 23

Attempted 329 of 3000: Found 61 finishers (18.54%), 268 DNFs, and 0 errors.
Attempted 330 of 3000: Found 61 finishers (18.48%), 269 DNFs, and 0 errors.
Attempted 331 of 3000: Found 61 finishers (18.43%), 270 DNFs, and 0 errors.
Attempted 332 of 3000: Found 61 finishers (18.37%), 271 DNFs, and 0 errors.
Attempted 333 of 3000: Found 61 finishers (18.32%), 272 DNFs, and 0 errors.
Attempted 334 of 3000: Found 61 finishers (18.26%), 273 DNFs, and 0 errors.
Attempted 335 of 3000: Found 61 finishers (18.21%), 274 DNFs, and 0 errors.
Attempted 336 of 3000: Found 62 finishers (18.45%), 274 DNFs, and 0 errors.
Attempted 337 of 3000: Found 62 finishers (18.40%), 275 DNFs, and 0 errors.
Attempted 338 of 3000: Found 62 finishers (18.34%), 276 DNFs, and 0 errors.
Attempted 339 of 3000: Found 62 finishers (18.29%), 277 DNFs, and 0 errors.
Attempted 340 of 3000: Found 62 finishers (18.24%), 278 DNFs, and 0 errors.
Attempted 341 of 3000: Found 62 finishers (18.18%), 279 DNFs, and 0 errors.
Attempted 34

Attempted 437 of 3000: Found 80 finishers (18.31%), 357 DNFs, and 0 errors.
Attempted 438 of 3000: Found 81 finishers (18.49%), 357 DNFs, and 0 errors.
Attempted 439 of 3000: Found 81 finishers (18.45%), 358 DNFs, and 0 errors.
Attempted 440 of 3000: Found 81 finishers (18.41%), 359 DNFs, and 0 errors.
Attempted 441 of 3000: Found 81 finishers (18.37%), 360 DNFs, and 0 errors.
Attempted 442 of 3000: Found 81 finishers (18.33%), 361 DNFs, and 0 errors.
Attempted 443 of 3000: Found 81 finishers (18.28%), 362 DNFs, and 0 errors.
Attempted 444 of 3000: Found 81 finishers (18.24%), 363 DNFs, and 0 errors.
Attempted 445 of 3000: Found 81 finishers (18.20%), 364 DNFs, and 0 errors.
Attempted 446 of 3000: Found 81 finishers (18.16%), 365 DNFs, and 0 errors.
Attempted 447 of 3000: Found 82 finishers (18.34%), 365 DNFs, and 0 errors.
Attempted 448 of 3000: Found 82 finishers (18.30%), 366 DNFs, and 0 errors.
Attempted 449 of 3000: Found 82 finishers (18.26%), 367 DNFs, and 0 errors.
Attempted 45

Attempted 545 of 3000: Found 102 finishers (18.72%), 443 DNFs, and 0 errors.
Attempted 546 of 3000: Found 103 finishers (18.86%), 443 DNFs, and 0 errors.
Attempted 547 of 3000: Found 103 finishers (18.83%), 444 DNFs, and 0 errors.
Attempted 548 of 3000: Found 103 finishers (18.80%), 445 DNFs, and 0 errors.
Attempted 549 of 3000: Found 104 finishers (18.94%), 445 DNFs, and 0 errors.
Attempted 550 of 3000: Found 104 finishers (18.91%), 446 DNFs, and 0 errors.
Attempted 551 of 3000: Found 104 finishers (18.87%), 447 DNFs, and 0 errors.
Attempted 552 of 3000: Found 104 finishers (18.84%), 448 DNFs, and 0 errors.
Attempted 553 of 3000: Found 104 finishers (18.81%), 449 DNFs, and 0 errors.
Attempted 554 of 3000: Found 104 finishers (18.77%), 450 DNFs, and 0 errors.
Attempted 555 of 3000: Found 104 finishers (18.74%), 451 DNFs, and 0 errors.
Attempted 556 of 3000: Found 104 finishers (18.71%), 452 DNFs, and 0 errors.
Attempted 557 of 3000: Found 104 finishers (18.67%), 453 DNFs, and 0 errors.

Attempted 652 of 3000: Found 123 finishers (18.87%), 529 DNFs, and 0 errors.
Attempted 653 of 3000: Found 123 finishers (18.84%), 530 DNFs, and 0 errors.
Attempted 654 of 3000: Found 123 finishers (18.81%), 531 DNFs, and 0 errors.
Attempted 655 of 3000: Found 123 finishers (18.78%), 532 DNFs, and 0 errors.
Attempted 656 of 3000: Found 123 finishers (18.75%), 533 DNFs, and 0 errors.
Attempted 657 of 3000: Found 123 finishers (18.72%), 534 DNFs, and 0 errors.
Attempted 658 of 3000: Found 123 finishers (18.69%), 535 DNFs, and 0 errors.
Attempted 659 of 3000: Found 123 finishers (18.66%), 536 DNFs, and 0 errors.
Attempted 660 of 3000: Found 123 finishers (18.64%), 537 DNFs, and 0 errors.
Attempted 661 of 3000: Found 123 finishers (18.61%), 538 DNFs, and 0 errors.
Attempted 662 of 3000: Found 123 finishers (18.58%), 539 DNFs, and 0 errors.
Attempted 663 of 3000: Found 123 finishers (18.55%), 540 DNFs, and 0 errors.
Attempted 664 of 3000: Found 123 finishers (18.52%), 541 DNFs, and 0 errors.

Attempted 759 of 3000: Found 138 finishers (18.18%), 621 DNFs, and 0 errors.
Attempted 760 of 3000: Found 138 finishers (18.16%), 622 DNFs, and 0 errors.
Attempted 761 of 3000: Found 138 finishers (18.13%), 623 DNFs, and 0 errors.
Attempted 762 of 3000: Found 138 finishers (18.11%), 624 DNFs, and 0 errors.
Attempted 763 of 3000: Found 139 finishers (18.22%), 624 DNFs, and 0 errors.
Attempted 764 of 3000: Found 139 finishers (18.19%), 625 DNFs, and 0 errors.
Attempted 765 of 3000: Found 139 finishers (18.17%), 626 DNFs, and 0 errors.
Attempted 766 of 3000: Found 140 finishers (18.28%), 626 DNFs, and 0 errors.
Attempted 767 of 3000: Found 140 finishers (18.25%), 627 DNFs, and 0 errors.
Attempted 768 of 3000: Found 140 finishers (18.23%), 628 DNFs, and 0 errors.
Attempted 769 of 3000: Found 141 finishers (18.34%), 628 DNFs, and 0 errors.
Attempted 770 of 3000: Found 141 finishers (18.31%), 629 DNFs, and 0 errors.
Attempted 771 of 3000: Found 141 finishers (18.29%), 630 DNFs, and 0 errors.

Attempted 866 of 3000: Found 158 finishers (18.24%), 708 DNFs, and 0 errors.
Attempted 867 of 3000: Found 158 finishers (18.22%), 709 DNFs, and 0 errors.
Attempted 868 of 3000: Found 158 finishers (18.20%), 710 DNFs, and 0 errors.
Attempted 869 of 3000: Found 158 finishers (18.18%), 711 DNFs, and 0 errors.
Attempted 870 of 3000: Found 158 finishers (18.16%), 712 DNFs, and 0 errors.
Attempted 871 of 3000: Found 158 finishers (18.14%), 713 DNFs, and 0 errors.
Attempted 872 of 3000: Found 159 finishers (18.23%), 713 DNFs, and 0 errors.
Attempted 873 of 3000: Found 159 finishers (18.21%), 714 DNFs, and 0 errors.
Attempted 874 of 3000: Found 159 finishers (18.19%), 715 DNFs, and 0 errors.
Attempted 875 of 3000: Found 160 finishers (18.29%), 715 DNFs, and 0 errors.
Attempted 876 of 3000: Found 161 finishers (18.38%), 715 DNFs, and 0 errors.
Attempted 877 of 3000: Found 162 finishers (18.47%), 715 DNFs, and 0 errors.
Attempted 878 of 3000: Found 163 finishers (18.56%), 715 DNFs, and 0 errors.

Attempted 973 of 3000: Found 184 finishers (18.91%), 789 DNFs, and 0 errors.
Attempted 974 of 3000: Found 184 finishers (18.89%), 790 DNFs, and 0 errors.
Attempted 975 of 3000: Found 184 finishers (18.87%), 791 DNFs, and 0 errors.
Attempted 976 of 3000: Found 184 finishers (18.85%), 792 DNFs, and 0 errors.
Attempted 977 of 3000: Found 184 finishers (18.83%), 793 DNFs, and 0 errors.
Attempted 978 of 3000: Found 185 finishers (18.92%), 793 DNFs, and 0 errors.
Attempted 979 of 3000: Found 185 finishers (18.90%), 794 DNFs, and 0 errors.
Attempted 980 of 3000: Found 185 finishers (18.88%), 795 DNFs, and 0 errors.
Attempted 981 of 3000: Found 185 finishers (18.86%), 796 DNFs, and 0 errors.
Attempted 982 of 3000: Found 185 finishers (18.84%), 797 DNFs, and 0 errors.
Attempted 983 of 3000: Found 185 finishers (18.82%), 798 DNFs, and 0 errors.
Attempted 984 of 3000: Found 185 finishers (18.80%), 799 DNFs, and 0 errors.
Attempted 985 of 3000: Found 186 finishers (18.88%), 799 DNFs, and 0 errors.

Attempted 1079 of 3000: Found 201 finishers (18.63%), 878 DNFs, and 0 errors.
Attempted 1080 of 3000: Found 202 finishers (18.70%), 878 DNFs, and 0 errors.
Attempted 1081 of 3000: Found 202 finishers (18.69%), 879 DNFs, and 0 errors.
Attempted 1082 of 3000: Found 202 finishers (18.67%), 880 DNFs, and 0 errors.
Attempted 1083 of 3000: Found 203 finishers (18.74%), 880 DNFs, and 0 errors.
Attempted 1084 of 3000: Found 203 finishers (18.73%), 881 DNFs, and 0 errors.
Attempted 1085 of 3000: Found 204 finishers (18.80%), 881 DNFs, and 0 errors.
Attempted 1086 of 3000: Found 204 finishers (18.78%), 882 DNFs, and 0 errors.
Attempted 1087 of 3000: Found 204 finishers (18.77%), 883 DNFs, and 0 errors.
Attempted 1088 of 3000: Found 204 finishers (18.75%), 884 DNFs, and 0 errors.
Attempted 1089 of 3000: Found 204 finishers (18.73%), 885 DNFs, and 0 errors.
Attempted 1090 of 3000: Found 204 finishers (18.72%), 886 DNFs, and 0 errors.
Attempted 1091 of 3000: Found 204 finishers (18.70%), 887 DNFs, 

Attempted 1185 of 3000: Found 222 finishers (18.73%), 963 DNFs, and 0 errors.
Attempted 1186 of 3000: Found 223 finishers (18.80%), 963 DNFs, and 0 errors.
Attempted 1187 of 3000: Found 223 finishers (18.79%), 964 DNFs, and 0 errors.
Attempted 1188 of 3000: Found 223 finishers (18.77%), 965 DNFs, and 0 errors.
Attempted 1189 of 3000: Found 223 finishers (18.76%), 966 DNFs, and 0 errors.
Attempted 1190 of 3000: Found 223 finishers (18.74%), 967 DNFs, and 0 errors.
Attempted 1191 of 3000: Found 223 finishers (18.72%), 968 DNFs, and 0 errors.
Attempted 1192 of 3000: Found 223 finishers (18.71%), 969 DNFs, and 0 errors.
Attempted 1193 of 3000: Found 223 finishers (18.69%), 970 DNFs, and 0 errors.
Attempted 1194 of 3000: Found 223 finishers (18.68%), 971 DNFs, and 0 errors.
Attempted 1195 of 3000: Found 224 finishers (18.74%), 971 DNFs, and 0 errors.
Attempted 1196 of 3000: Found 224 finishers (18.73%), 972 DNFs, and 0 errors.
Attempted 1197 of 3000: Found 224 finishers (18.71%), 973 DNFs, 

Attempted 1290 of 3000: Found 243 finishers (18.84%), 1047 DNFs, and 0 errors.
Attempted 1291 of 3000: Found 243 finishers (18.82%), 1048 DNFs, and 0 errors.
Attempted 1292 of 3000: Found 244 finishers (18.89%), 1048 DNFs, and 0 errors.
Attempted 1293 of 3000: Found 244 finishers (18.87%), 1049 DNFs, and 0 errors.
Attempted 1294 of 3000: Found 245 finishers (18.93%), 1049 DNFs, and 0 errors.
Attempted 1295 of 3000: Found 245 finishers (18.92%), 1050 DNFs, and 0 errors.
Attempted 1296 of 3000: Found 245 finishers (18.90%), 1051 DNFs, and 0 errors.
Attempted 1297 of 3000: Found 245 finishers (18.89%), 1052 DNFs, and 0 errors.
Attempted 1298 of 3000: Found 245 finishers (18.88%), 1053 DNFs, and 0 errors.
Attempted 1299 of 3000: Found 245 finishers (18.86%), 1054 DNFs, and 0 errors.
Attempted 1300 of 3000: Found 245 finishers (18.85%), 1055 DNFs, and 0 errors.
Attempted 1301 of 3000: Found 245 finishers (18.83%), 1056 DNFs, and 0 errors.
Attempted 1302 of 3000: Found 245 finishers (18.82%)

Attempted 1394 of 3000: Found 264 finishers (18.94%), 1130 DNFs, and 0 errors.
Attempted 1395 of 3000: Found 264 finishers (18.92%), 1131 DNFs, and 0 errors.
Attempted 1396 of 3000: Found 264 finishers (18.91%), 1132 DNFs, and 0 errors.
Attempted 1397 of 3000: Found 264 finishers (18.90%), 1133 DNFs, and 0 errors.
Attempted 1398 of 3000: Found 264 finishers (18.88%), 1134 DNFs, and 0 errors.
Attempted 1399 of 3000: Found 264 finishers (18.87%), 1135 DNFs, and 0 errors.
Attempted 1400 of 3000: Found 264 finishers (18.86%), 1136 DNFs, and 0 errors.
Attempted 1401 of 3000: Found 264 finishers (18.84%), 1137 DNFs, and 0 errors.
Attempted 1402 of 3000: Found 264 finishers (18.83%), 1138 DNFs, and 0 errors.
Attempted 1403 of 3000: Found 264 finishers (18.82%), 1139 DNFs, and 0 errors.
Attempted 1404 of 3000: Found 264 finishers (18.80%), 1140 DNFs, and 0 errors.
Attempted 1405 of 3000: Found 264 finishers (18.79%), 1141 DNFs, and 0 errors.
Attempted 1406 of 3000: Found 264 finishers (18.78%)

Attempted 1498 of 3000: Found 285 finishers (19.03%), 1213 DNFs, and 0 errors.
Attempted 1499 of 3000: Found 285 finishers (19.01%), 1214 DNFs, and 0 errors.
Attempted 1500 of 3000: Found 285 finishers (19.00%), 1215 DNFs, and 0 errors.
Attempted 1501 of 3000: Found 285 finishers (18.99%), 1216 DNFs, and 0 errors.
Attempted 1502 of 3000: Found 285 finishers (18.97%), 1217 DNFs, and 0 errors.
Attempted 1503 of 3000: Found 285 finishers (18.96%), 1218 DNFs, and 0 errors.
Attempted 1504 of 3000: Found 285 finishers (18.95%), 1219 DNFs, and 0 errors.
Attempted 1505 of 3000: Found 285 finishers (18.94%), 1220 DNFs, and 0 errors.
Attempted 1506 of 3000: Found 286 finishers (18.99%), 1220 DNFs, and 0 errors.
Attempted 1507 of 3000: Found 287 finishers (19.04%), 1220 DNFs, and 0 errors.
Attempted 1508 of 3000: Found 287 finishers (19.03%), 1221 DNFs, and 0 errors.
Attempted 1509 of 3000: Found 287 finishers (19.02%), 1222 DNFs, and 0 errors.
Attempted 1510 of 3000: Found 287 finishers (19.01%)

Attempted 1602 of 3000: Found 305 finishers (19.04%), 1297 DNFs, and 0 errors.
Attempted 1603 of 3000: Found 305 finishers (19.03%), 1298 DNFs, and 0 errors.
Attempted 1604 of 3000: Found 305 finishers (19.01%), 1299 DNFs, and 0 errors.
Attempted 1605 of 3000: Found 305 finishers (19.00%), 1300 DNFs, and 0 errors.
Attempted 1606 of 3000: Found 305 finishers (18.99%), 1301 DNFs, and 0 errors.
Attempted 1607 of 3000: Found 305 finishers (18.98%), 1302 DNFs, and 0 errors.
Attempted 1608 of 3000: Found 306 finishers (19.03%), 1302 DNFs, and 0 errors.
Attempted 1609 of 3000: Found 306 finishers (19.02%), 1303 DNFs, and 0 errors.
Attempted 1610 of 3000: Found 306 finishers (19.01%), 1304 DNFs, and 0 errors.
Attempted 1611 of 3000: Found 306 finishers (18.99%), 1305 DNFs, and 0 errors.
Attempted 1612 of 3000: Found 306 finishers (18.98%), 1306 DNFs, and 0 errors.
Attempted 1613 of 3000: Found 306 finishers (18.97%), 1307 DNFs, and 0 errors.
Attempted 1614 of 3000: Found 306 finishers (18.96%)

Attempted 1706 of 3000: Found 320 finishers (18.76%), 1386 DNFs, and 0 errors.
Attempted 1707 of 3000: Found 320 finishers (18.75%), 1387 DNFs, and 0 errors.
Attempted 1708 of 3000: Found 320 finishers (18.74%), 1388 DNFs, and 0 errors.
Attempted 1709 of 3000: Found 321 finishers (18.78%), 1388 DNFs, and 0 errors.
Attempted 1710 of 3000: Found 321 finishers (18.77%), 1389 DNFs, and 0 errors.
Attempted 1711 of 3000: Found 321 finishers (18.76%), 1390 DNFs, and 0 errors.
Attempted 1712 of 3000: Found 321 finishers (18.75%), 1391 DNFs, and 0 errors.
Attempted 1713 of 3000: Found 322 finishers (18.80%), 1391 DNFs, and 0 errors.
Attempted 1714 of 3000: Found 322 finishers (18.79%), 1392 DNFs, and 0 errors.
Attempted 1715 of 3000: Found 322 finishers (18.78%), 1393 DNFs, and 0 errors.
Attempted 1716 of 3000: Found 322 finishers (18.76%), 1394 DNFs, and 0 errors.
Attempted 1717 of 3000: Found 322 finishers (18.75%), 1395 DNFs, and 0 errors.
Attempted 1718 of 3000: Found 322 finishers (18.74%)

Attempted 1810 of 3000: Found 337 finishers (18.62%), 1473 DNFs, and 0 errors.
Attempted 1811 of 3000: Found 337 finishers (18.61%), 1474 DNFs, and 0 errors.
Attempted 1812 of 3000: Found 337 finishers (18.60%), 1475 DNFs, and 0 errors.
Attempted 1813 of 3000: Found 338 finishers (18.64%), 1475 DNFs, and 0 errors.
Attempted 1814 of 3000: Found 338 finishers (18.63%), 1476 DNFs, and 0 errors.
Attempted 1815 of 3000: Found 338 finishers (18.62%), 1477 DNFs, and 0 errors.
Attempted 1816 of 3000: Found 338 finishers (18.61%), 1478 DNFs, and 0 errors.
Attempted 1817 of 3000: Found 339 finishers (18.66%), 1478 DNFs, and 0 errors.
Attempted 1818 of 3000: Found 340 finishers (18.70%), 1478 DNFs, and 0 errors.
Attempted 1819 of 3000: Found 340 finishers (18.69%), 1479 DNFs, and 0 errors.
Attempted 1820 of 3000: Found 340 finishers (18.68%), 1480 DNFs, and 0 errors.
Attempted 1821 of 3000: Found 340 finishers (18.67%), 1481 DNFs, and 0 errors.
Attempted 1822 of 3000: Found 340 finishers (18.66%)

Attempted 1914 of 3000: Found 357 finishers (18.65%), 1557 DNFs, and 0 errors.
Attempted 1915 of 3000: Found 357 finishers (18.64%), 1558 DNFs, and 0 errors.
Attempted 1916 of 3000: Found 358 finishers (18.68%), 1558 DNFs, and 0 errors.
Attempted 1917 of 3000: Found 358 finishers (18.68%), 1559 DNFs, and 0 errors.
Attempted 1918 of 3000: Found 358 finishers (18.67%), 1560 DNFs, and 0 errors.
Attempted 1919 of 3000: Found 358 finishers (18.66%), 1561 DNFs, and 0 errors.
Attempted 1920 of 3000: Found 358 finishers (18.65%), 1562 DNFs, and 0 errors.
Attempted 1921 of 3000: Found 358 finishers (18.64%), 1563 DNFs, and 0 errors.
Attempted 1922 of 3000: Found 358 finishers (18.63%), 1564 DNFs, and 0 errors.
Attempted 1923 of 3000: Found 358 finishers (18.62%), 1565 DNFs, and 0 errors.
Attempted 1924 of 3000: Found 358 finishers (18.61%), 1566 DNFs, and 0 errors.
Attempted 1925 of 3000: Found 359 finishers (18.65%), 1566 DNFs, and 0 errors.
Attempted 1926 of 3000: Found 359 finishers (18.64%)

Attempted 2018 of 3000: Found 375 finishers (18.58%), 1643 DNFs, and 0 errors.
Attempted 2019 of 3000: Found 376 finishers (18.62%), 1643 DNFs, and 0 errors.
Attempted 2020 of 3000: Found 376 finishers (18.61%), 1644 DNFs, and 0 errors.
Attempted 2021 of 3000: Found 376 finishers (18.60%), 1645 DNFs, and 0 errors.
Attempted 2022 of 3000: Found 376 finishers (18.60%), 1646 DNFs, and 0 errors.
Attempted 2023 of 3000: Found 376 finishers (18.59%), 1647 DNFs, and 0 errors.
Attempted 2024 of 3000: Found 376 finishers (18.58%), 1648 DNFs, and 0 errors.
Attempted 2025 of 3000: Found 377 finishers (18.62%), 1648 DNFs, and 0 errors.
Attempted 2026 of 3000: Found 377 finishers (18.61%), 1649 DNFs, and 0 errors.
Attempted 2027 of 3000: Found 377 finishers (18.60%), 1650 DNFs, and 0 errors.
Attempted 2028 of 3000: Found 377 finishers (18.59%), 1651 DNFs, and 0 errors.
Attempted 2029 of 3000: Found 378 finishers (18.63%), 1651 DNFs, and 0 errors.
Attempted 2030 of 3000: Found 379 finishers (18.67%)

Attempted 2122 of 3000: Found 393 finishers (18.52%), 1729 DNFs, and 0 errors.
Attempted 2123 of 3000: Found 393 finishers (18.51%), 1730 DNFs, and 0 errors.
Attempted 2124 of 3000: Found 393 finishers (18.50%), 1731 DNFs, and 0 errors.
Attempted 2125 of 3000: Found 393 finishers (18.49%), 1732 DNFs, and 0 errors.
Attempted 2126 of 3000: Found 394 finishers (18.53%), 1732 DNFs, and 0 errors.
Attempted 2127 of 3000: Found 394 finishers (18.52%), 1733 DNFs, and 0 errors.
Attempted 2128 of 3000: Found 394 finishers (18.52%), 1734 DNFs, and 0 errors.
Attempted 2129 of 3000: Found 394 finishers (18.51%), 1735 DNFs, and 0 errors.
Attempted 2130 of 3000: Found 394 finishers (18.50%), 1736 DNFs, and 0 errors.
Attempted 2131 of 3000: Found 394 finishers (18.49%), 1737 DNFs, and 0 errors.
Attempted 2132 of 3000: Found 394 finishers (18.48%), 1738 DNFs, and 0 errors.
Attempted 2133 of 3000: Found 394 finishers (18.47%), 1739 DNFs, and 0 errors.
Attempted 2134 of 3000: Found 394 finishers (18.46%)

Attempted 2226 of 3000: Found 411 finishers (18.46%), 1814 DNFs, and 1 errors.
Attempted 2227 of 3000: Found 411 finishers (18.46%), 1815 DNFs, and 1 errors.
Attempted 2228 of 3000: Found 411 finishers (18.45%), 1816 DNFs, and 1 errors.
Attempted 2229 of 3000: Found 412 finishers (18.48%), 1816 DNFs, and 1 errors.
Attempted 2230 of 3000: Found 413 finishers (18.52%), 1816 DNFs, and 1 errors.
Attempted 2231 of 3000: Found 413 finishers (18.51%), 1817 DNFs, and 1 errors.
Attempted 2232 of 3000: Found 413 finishers (18.50%), 1818 DNFs, and 1 errors.
Attempted 2233 of 3000: Found 413 finishers (18.50%), 1819 DNFs, and 1 errors.
Attempted 2234 of 3000: Found 413 finishers (18.49%), 1820 DNFs, and 1 errors.
Attempted 2235 of 3000: Found 413 finishers (18.48%), 1821 DNFs, and 1 errors.
Attempted 2236 of 3000: Found 413 finishers (18.47%), 1822 DNFs, and 1 errors.
Attempted 2237 of 3000: Found 413 finishers (18.46%), 1823 DNFs, and 1 errors.
Attempted 2238 of 3000: Found 413 finishers (18.45%)

Attempted 2330 of 3000: Found 431 finishers (18.50%), 1898 DNFs, and 1 errors.
Attempted 2331 of 3000: Found 432 finishers (18.53%), 1898 DNFs, and 1 errors.
Attempted 2332 of 3000: Found 432 finishers (18.52%), 1899 DNFs, and 1 errors.
Attempted 2333 of 3000: Found 432 finishers (18.52%), 1900 DNFs, and 1 errors.
Attempted 2334 of 3000: Found 432 finishers (18.51%), 1901 DNFs, and 1 errors.
Attempted 2335 of 3000: Found 432 finishers (18.50%), 1902 DNFs, and 1 errors.
Attempted 2336 of 3000: Found 432 finishers (18.49%), 1903 DNFs, and 1 errors.
Attempted 2337 of 3000: Found 432 finishers (18.49%), 1904 DNFs, and 1 errors.
Attempted 2338 of 3000: Found 433 finishers (18.52%), 1904 DNFs, and 1 errors.
Attempted 2339 of 3000: Found 433 finishers (18.51%), 1905 DNFs, and 1 errors.
Attempted 2340 of 3000: Found 433 finishers (18.50%), 1906 DNFs, and 1 errors.
Attempted 2341 of 3000: Found 434 finishers (18.54%), 1906 DNFs, and 1 errors.
Attempted 2342 of 3000: Found 434 finishers (18.53%)

Attempted 2434 of 3000: Found 458 finishers (18.82%), 1975 DNFs, and 1 errors.
Attempted 2435 of 3000: Found 459 finishers (18.85%), 1975 DNFs, and 1 errors.
Attempted 2436 of 3000: Found 459 finishers (18.84%), 1976 DNFs, and 1 errors.
Attempted 2437 of 3000: Found 459 finishers (18.83%), 1977 DNFs, and 1 errors.
Attempted 2438 of 3000: Found 460 finishers (18.87%), 1977 DNFs, and 1 errors.
Attempted 2439 of 3000: Found 460 finishers (18.86%), 1978 DNFs, and 1 errors.
Attempted 2440 of 3000: Found 460 finishers (18.85%), 1979 DNFs, and 1 errors.
Attempted 2441 of 3000: Found 460 finishers (18.84%), 1980 DNFs, and 1 errors.
Attempted 2442 of 3000: Found 460 finishers (18.84%), 1981 DNFs, and 1 errors.
Attempted 2443 of 3000: Found 460 finishers (18.83%), 1982 DNFs, and 1 errors.
Attempted 2444 of 3000: Found 460 finishers (18.82%), 1983 DNFs, and 1 errors.
Attempted 2445 of 3000: Found 461 finishers (18.85%), 1983 DNFs, and 1 errors.
Attempted 2446 of 3000: Found 461 finishers (18.85%)

Attempted 2538 of 3000: Found 480 finishers (18.91%), 2057 DNFs, and 1 errors.
Attempted 2539 of 3000: Found 480 finishers (18.91%), 2058 DNFs, and 1 errors.
Attempted 2540 of 3000: Found 480 finishers (18.90%), 2059 DNFs, and 1 errors.
Attempted 2541 of 3000: Found 481 finishers (18.93%), 2059 DNFs, and 1 errors.
Attempted 2542 of 3000: Found 481 finishers (18.92%), 2060 DNFs, and 1 errors.
Attempted 2543 of 3000: Found 482 finishers (18.95%), 2060 DNFs, and 1 errors.
Attempted 2544 of 3000: Found 482 finishers (18.95%), 2061 DNFs, and 1 errors.
Attempted 2545 of 3000: Found 482 finishers (18.94%), 2062 DNFs, and 1 errors.
Attempted 2546 of 3000: Found 482 finishers (18.93%), 2063 DNFs, and 1 errors.
Attempted 2547 of 3000: Found 483 finishers (18.96%), 2063 DNFs, and 1 errors.
Attempted 2548 of 3000: Found 483 finishers (18.96%), 2064 DNFs, and 1 errors.
Attempted 2549 of 3000: Found 484 finishers (18.99%), 2064 DNFs, and 1 errors.
Attempted 2550 of 3000: Found 485 finishers (19.02%)

Attempted 2642 of 3000: Found 511 finishers (19.34%), 2130 DNFs, and 1 errors.
Attempted 2643 of 3000: Found 511 finishers (19.33%), 2131 DNFs, and 1 errors.
Attempted 2644 of 3000: Found 511 finishers (19.33%), 2132 DNFs, and 1 errors.
Attempted 2645 of 3000: Found 511 finishers (19.32%), 2133 DNFs, and 1 errors.
Attempted 2646 of 3000: Found 511 finishers (19.31%), 2134 DNFs, and 1 errors.
Attempted 2647 of 3000: Found 511 finishers (19.30%), 2135 DNFs, and 1 errors.
Attempted 2648 of 3000: Found 511 finishers (19.30%), 2136 DNFs, and 1 errors.
Attempted 2649 of 3000: Found 511 finishers (19.29%), 2137 DNFs, and 1 errors.
Attempted 2650 of 3000: Found 511 finishers (19.28%), 2138 DNFs, and 1 errors.
Attempted 2651 of 3000: Found 511 finishers (19.28%), 2139 DNFs, and 1 errors.
Attempted 2652 of 3000: Found 511 finishers (19.27%), 2140 DNFs, and 1 errors.
Attempted 2653 of 3000: Found 512 finishers (19.30%), 2140 DNFs, and 1 errors.
Attempted 2654 of 3000: Found 513 finishers (19.33%)

Attempted 2746 of 3000: Found 533 finishers (19.41%), 2211 DNFs, and 2 errors.
Attempted 2747 of 3000: Found 534 finishers (19.44%), 2211 DNFs, and 2 errors.
Attempted 2748 of 3000: Found 534 finishers (19.43%), 2212 DNFs, and 2 errors.
Attempted 2749 of 3000: Found 534 finishers (19.43%), 2213 DNFs, and 2 errors.
Attempted 2750 of 3000: Found 534 finishers (19.42%), 2214 DNFs, and 2 errors.
Attempted 2751 of 3000: Found 534 finishers (19.41%), 2215 DNFs, and 2 errors.
Attempted 2752 of 3000: Found 534 finishers (19.40%), 2216 DNFs, and 2 errors.
Attempted 2753 of 3000: Found 534 finishers (19.40%), 2217 DNFs, and 2 errors.
Attempted 2754 of 3000: Found 534 finishers (19.39%), 2218 DNFs, and 2 errors.
Attempted 2755 of 3000: Found 535 finishers (19.42%), 2218 DNFs, and 2 errors.
Attempted 2756 of 3000: Found 536 finishers (19.45%), 2218 DNFs, and 2 errors.
Attempted 2757 of 3000: Found 536 finishers (19.44%), 2219 DNFs, and 2 errors.
Attempted 2758 of 3000: Found 536 finishers (19.43%)

Attempted 2850 of 3000: Found 559 finishers (19.61%), 2289 DNFs, and 2 errors.
Attempted 2851 of 3000: Found 559 finishers (19.61%), 2290 DNFs, and 2 errors.
Attempted 2852 of 3000: Found 559 finishers (19.60%), 2291 DNFs, and 2 errors.
Attempted 2853 of 3000: Found 559 finishers (19.59%), 2292 DNFs, and 2 errors.
Attempted 2854 of 3000: Found 560 finishers (19.62%), 2292 DNFs, and 2 errors.
Attempted 2855 of 3000: Found 560 finishers (19.61%), 2293 DNFs, and 2 errors.
Attempted 2856 of 3000: Found 560 finishers (19.61%), 2294 DNFs, and 2 errors.
Attempted 2857 of 3000: Found 560 finishers (19.60%), 2295 DNFs, and 2 errors.
Attempted 2858 of 3000: Found 560 finishers (19.59%), 2296 DNFs, and 2 errors.
Attempted 2859 of 3000: Found 560 finishers (19.59%), 2297 DNFs, and 2 errors.
Attempted 2860 of 3000: Found 561 finishers (19.62%), 2297 DNFs, and 2 errors.
Attempted 2861 of 3000: Found 561 finishers (19.61%), 2298 DNFs, and 2 errors.
Attempted 2862 of 3000: Found 561 finishers (19.60%)

Attempted 2954 of 3000: Found 574 finishers (19.43%), 2378 DNFs, and 2 errors.
Attempted 2955 of 3000: Found 574 finishers (19.42%), 2379 DNFs, and 2 errors.
Attempted 2956 of 3000: Found 574 finishers (19.42%), 2380 DNFs, and 2 errors.
Attempted 2957 of 3000: Found 574 finishers (19.41%), 2381 DNFs, and 2 errors.
Attempted 2958 of 3000: Found 574 finishers (19.41%), 2382 DNFs, and 2 errors.
Attempted 2959 of 3000: Found 574 finishers (19.40%), 2383 DNFs, and 2 errors.
Attempted 2960 of 3000: Found 574 finishers (19.39%), 2384 DNFs, and 2 errors.
Attempted 2961 of 3000: Found 574 finishers (19.39%), 2385 DNFs, and 2 errors.
Attempted 2962 of 3000: Found 575 finishers (19.41%), 2385 DNFs, and 2 errors.
Attempted 2963 of 3000: Found 575 finishers (19.41%), 2386 DNFs, and 2 errors.
Attempted 2964 of 3000: Found 575 finishers (19.40%), 2387 DNFs, and 2 errors.
Attempted 2965 of 3000: Found 575 finishers (19.39%), 2388 DNFs, and 2 errors.
Attempted 2966 of 3000: Found 575 finishers (19.39%)

# EDA

### Convert scraped data to dataframe

In [9]:
finishers_df = pd.DataFrame(runners)

In [10]:
finishers_df.info(verbose = True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 582 entries, 0 to 581
Data columns (total 54 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   name                   582 non-null    object
 1   geo_subregion          582 non-null    object
 2   country                582 non-null    object
 3   gender                 582 non-null    object
 4   age                    582 non-null    object
 5   bib                    582 non-null    object
 6   official_time          582 non-null    object
 7   pace_per_mile          582 non-null    object
 8   place_overall          582 non-null    object
 9   place_gender           582 non-null    object
 10  age_group              582 non-null    object
 11  place_age-group        582 non-null    object
 12  country_group          582 non-null    object
 13  place_country          582 non-null    object
 14  place_age‐graded       582 non-null    object
 15  time_age‐graded        

In [11]:
finishers_df

Unnamed: 0,name,geo_subregion,country,gender,age,bib,official_time,pace_per_mile,place_overall,place_gender,...,21m,35k,22m,23m,24m,40k,25m,26m,mar,team
0,Dina Castellanos,Chetumal,MEX,F,61,72385,7:11:12,16:27,52661,22182,...,5:41:49,5:54:05,5:58:18,6:14:50,6:32:44,6:47:24,6:50:01,7:07:24,7:11:12,
1,Rajesh Sunder,"Lutz, FL",USA,M,44,72665,6:11:36,14:11,49879,29372,...,4:53:51,5:05:12,5:08:59,5:24:18,5:39:41,5:52:38,5:54:40,6:08:52,6:11:36,
2,Cynthia Marfori-Gordon,"New York, NY",USA,F,57,72073,7:19:00,16:45,52824,22277,...,5:55:08,6:08:34,6:12:36,,6:45:06,6:58:05,7:00:22,7:15:34,7:19:00,New York Walkers Club
3,Nichol Hare,"New York, NY",USA,F,40,72512,6:28:59,14:51,51068,21248,...,5:10:56,5:22:28,5:25:55,5:42:28,5:57:23,6:09:21,6:11:45,6:25:51,6:28:59,
4,Sarath Challa,"New Milford, NJ",USA,M,35,72444,7:30:37,17:12,53031,30620,...,5:42:02,5:56:48,6:01:44,,6:41:41,6:59:38,7:03:03,7:25:41,7:30:37,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
577,Iain Craig,Auckland,NZL,M,55,72531,5:56:45,13:37,48458,28762,...,4:45:51,4:55:04,4:58:34,5:11:26,5:25:57,5:38:22,5:40:06,5:53:49,5:56:45,
578,Danny Mota,"Brooklyn, NY",USA,M,41,72047,6:33:56,15:02,51339,29948,...,5:05:45,5:17:30,5:21:47,5:39:31,5:59:04,6:12:02,6:14:46,6:31:02,6:33:56,
579,Johnine Barnes,"Washington, DC",USA,F,49,72117,6:31:22,14:56,51203,21318,...,5:05:54,5:18:12,5:22:37,5:40:30,5:58:01,6:11:13,6:13:33,6:28:13,6:31:22,Black Girls Run
580,Julie Romain,"Buffalo, NY",USA,F,50,72521,6:49:27,15:37,51998,21759,...,5:20:40,5:33:33,5:37:56,5:55:01,6:12:19,6:26:58,6:29:28,6:46:02,6:49:27,


# Store Data

### Write finishers to csv

In [12]:
def df_to_csv_writer(target_df, destination_file):
    import csv

    # We want to match the order of columns if we are appending new entries to an existing file.
    ### Because we are using a dictionary to capture the race results, and of the way dictionaries are ordered,
    ### It is possible for different scraping runs to attempt to order the columns differently.
    ### That would be confusing, so we force whatever order was established in the first run.
    ### (The bottom line is that it would really be a good idea to use a database here instead.)

    # Try to read the header from the existing file. If they exist, match the column order.
    try:
        with open(destination_file, 'r', encoding="utf-8") as f:
            reader = csv.reader(f)
            header_list = next(reader)
            # It is possible to be missing columns, so check to see if we have everything.
            for x in header_list:
                if x not in target_df.columns:
                    target_df[x] = ''
            target_df = target_df[header_list]
    # If we weren't able to open the file, we are going to create one from scratch.
    except:
        print(f"{destination_file} does not exist, so I'll create it!")

    # This either creates the file, or appends to it without headers. We already took steps to make sure
    # the columns in finishers_df match the headers (if they exist).
    with open(destination_file, 'a', encoding="utf-8") as f:
        target_df.to_csv(f, header=f.tell()==0, index = False, line_terminator = '\n')

In [13]:
# import csv
# output_csv = "{}{}_{}.csv".format(output_csv_path, race_code, output_csv_name_base)

# # We want to match the order of columns if we are appending new entries to an existing file.
# ### Because we are using a dictionary to capture the race results, and of the way dictionaries are ordered,
# ### It is possible for different scraping runs to attempt to order the columns differently.
# ### That would be confusing, so we force whatever order was established in the first run.
# ### (The bottom line is that it would really be a good idea to use a database here instead.)

# # Try to read the header from the existing file. If they exist, match the column order.
# try:
#     with open(output_csv, 'r', encoding="utf-8") as f:
#         reader = csv.reader(f)
#         header_list = next(reader)
#         # It is possible to be missing columns, so check to see if we have everything.
#         for x in header_list:
#             if x not in finishers_df.columns:
#                 finishers_df[x] = ''
#         finishers_df = finishers_df[header_list]
# # If we weren't able to open the file, we are going to create one from scratch.
# except:
#     print(f"{output_csv} does not exist, so I'll create it!")

# # This either creates the file, or appends to it without headers. We already took steps to make sure
# # the columns in finishers_df match the headers (if they exist).
# with open(output_csv, 'a', encoding="utf-8") as f:
#     finishers_df.to_csv(f, header=f.tell()==0, index = False, line_terminator = '\n')

In [14]:
# define the path and name for the output file
output_csv = "{}{}_{}.csv".format(output_csv_path, race_code, output_csv_name_base)

df_to_csv_writer(finishers_df, output_csv)

### Write scraping log to csv

In [15]:
log_df = pd.DataFrame(scraping_log)
log_csv = "{}{}_{}.csv".format(log_csv_path, race_code, log_csv_name_base)

# if the file does not exist, create it.
# if the file does exist, append to it without headers.
with open(log_csv, 'a', encoding="utf-8") as f:
    log_df.to_csv(f, header=f.tell()==0, line_terminator = '\n')