In [1]:
from playwright.async_api import async_playwright
import asyncio
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# Create column lists
qb_df_column_names = ["Date", "Opponent", "Result", "CMP", "ATT", "PASS_YDS", "CMP%", "PASS_AVG", "PASS_TD", "INT", "PASS_LNG", "SACK", "RTG", "QBR", "RUSH_CAR", "RUSH_YDS", "RUSH_AVG", "RUSH_TD", "LNG_RUSH", "playerName"]
rb_df_column_names = ["Date", "Opponent", "Result", "CAR", "RUSH_YDS", "RUSH_AVG", "RUSH_TD", "RUSH_LNG", "REC", "TGTS", "REC_YDS", "REC_AVG", "REC_TD", "LNG_REC", "FUM", "LST", "FF", "KB", "playerName"]
wrTE_df_column_names = ["Date", "Opponent", "Result", "REC", "TGTS", "REC_YDS", "REC_AVG", "REC_TD", "LNG_REC", "CAR", "RUSH_YDS", "RUSH_AVG", "LNG_RUSH", "RUSH_TD", "FUM", "LST", "FF", "KB", "playerName"]
P_df_column_names = ["Date", "Opponent", "Result", "PUNTS", "PUNT_AVG", "LNG_PUNT", "PUNT_YDS", "TB", "TB%", "IN20", "IN20%", "RETURN_ATT", "RETURN_YDS", "RETURN_AVG", "NET_AVERAGE_PUNT_YARDS", "playerName"]

# Create empty list to initialize position dataframes
empty_data_list = []

# Create two arrays. One for player names and one for href links to player pages.
player_names = []
href_links = []

# Capture player position
player_positions = []

# Initiate the dataframes to hold qb, rb, and wr gamelog data
all_qb_df = pd.DataFrame(empty_data_list, columns=qb_df_column_names)
all_rb_df = pd.DataFrame(empty_data_list, columns=rb_df_column_names)
all_wrTE_df = pd.DataFrame(empty_data_list, columns=wrTE_df_column_names)
all_P_df = pd.DataFrame(empty_data_list, columns=P_df_column_names)


# This allows our window to only exist as long as the session
async with async_playwright() as p:
    # Create a 5 second delay for page loading.
    delay=2
    # Create instance of browser
    browser = await p.chromium.launch(headless=False, slow_mo=50)

    PRW_LINKS = ['', '/_/stat/rushing', '/_/stat/receiving']

    for PRW_Link in PRW_LINKS:
        # Create new page of browser
        page = await browser.new_page()
        # Go to espn to find passer stats (Need to add RB and WR option)
        await page.goto('https://www.espn.com/nfl/stats/player' + PRW_Link)
        # Delay
        await asyncio.sleep(delay)
        # Click "Show More" to see all players
        await page.locator('text=Show More').click()
        # Delay
        await asyncio.sleep(delay)
        # Pull html to pull info from.
        html = await page.inner_html('#fittPageContainer')
    
        # Create BeautifulSoup from html
        soup = BeautifulSoup(html, "html.parser")
    
        # Pull a subsection of html for positional/data rows. This helps subsequent calls for tags.
        catered_soup = soup.find('div', class_='Table__ScrollerWrapper relative overflow-hidden')
        catered_soup_Part2 = catered_soup.find_all('tr', class_='Table__TR Table__TR--sm Table__even')
    
        # Run through all positional/data rows and append positions to list player_positions.
        for position in catered_soup_Part2:
            Position_Text = position.find('td', class_='position Table__TD').text
            player_positions.append(Position_Text)
    
        # Pull a subsection of player name and href html. This helps subsequent calls for tags.
        catered_soup = soup.find('tbody', class_='Table__TBODY')
    
        # Find all player tags
        allPlayers = catered_soup.find_all('tr', class_='Table__TR Table__TR--sm Table__even')
    
        # Run through all player tags, pull player name and href link, append them into arrays.
        for players in allPlayers:
                player_name = players.find('a').text
                player_href = players.find('a', href=True)
    
                player_names.append(player_name)
                href_links.append(player_href['href'])

print(href_links)

async with async_playwright() as searchLinks:
    delay=2
    # Initialize player as 0. We will be incrementing to get name and player position connected to the link.
    Increment_player = 0
    await asyncio.sleep(delay)
    # Create instance of browser
    browser = await searchLinks.chromium.launch(headless=False, slow_mo=50)
    
    # Take all links, go to all webpages, pull game by game stats, store in dataframe.
    for href_link in href_links:
        
        await asyncio.sleep(delay)
        
        # Create new page of browser
        page = await browser.new_page()

        # Find player name using increment technique
        player_name = player_names[Increment_player]

        # Find position name using increment technique
        player_position = player_positions[Increment_player]
        
        # Have list reset after each player. Will be stored in dataframe.
        gamelog_list = []
        # all_rb_list = []
        # all_wrTE_list = []
        # all_P_list = []

        
        x = href_link.split("/_")
        new_href = x[0] + "/gamelog/_" + x[1]
        

        await page.goto(new_href)
        await asyncio.sleep(delay)
        
        # Pull html to pull info from.
        gamelog_html = await page.inner_html('#fittPageContainer')

        # # Create BeautifulSoup from html
        gamelog_soup = BeautifulSoup(gamelog_html, "html.parser")
        
        catered_gamelog_soup = gamelog_soup.find('tbody', class_='Table__TBODY')

        
        await asyncio.sleep(delay)
        
        # Find all player stats tags for even row games
        odd_playerStats = catered_gamelog_soup.find_all('tr', class_='Table__TR Table__TR--sm Table__even')
        
        for odd_player_stat_line in odd_playerStats:
            odd_playerStats = odd_player_stat_line.find_all('td', class_='Table__TD')
            row_data_1 = [td.text.strip() for td in odd_playerStats]
            row_data_1.append(player_name)
            gamelog_list.append(row_data_1)

        # Find all player stats tags for odd row games
        even_playerStats = catered_gamelog_soup.find_all('tr', class_='filled Table__TR Table__TR--sm Table__even')
        
        for even_player_stat_line in even_playerStats:
            even_playerStats = even_player_stat_line.find_all('td', class_='Table__TD')
            row_data_2 = [td.text.strip() for td in even_playerStats]
            row_data_2.append(player_name)
            gamelog_list.append(row_data_2)

        print(gamelog_list)
        # Sort data into the correct dataframe
        if player_position == "QB":
            df_new = pd.DataFrame(gamelog_list, columns=qb_df_column_names)
            await asyncio.sleep(3)
            all_qb_df = pd.concat([all_qb_df, df_new], ignore_index=True)
        elif player_position == "RB":
            df_new = pd.DataFrame(gamelog_list, columns=rb_df_column_names)
            await asyncio.sleep(3)
            all_rb_df = pd.concat([all_rb_df, df_new], ignore_index=True)
        elif (player_position == "WR" or player_position == "TE"):
            df_new = pd.DataFrame(gamelog_list, columns=wrTE_df_column_names)
            await asyncio.sleep(3)
            all_wrTE_df = pd.concat([all_wrTE_df, df_new], ignore_index=True)
        elif player_position == "P":
            df_new = pd.DataFrame(gamelog_list, columns=P_df_column_names)
            await asyncio.sleep(3)
            all_P_df = pd.concat([all_P_df, df_new], ignore_index=True)
        else:
            print("Unexpected Position")

        await asyncio.sleep(delay)

        Increment_player+=1

        await page.close()


    all_qb_df.to_csv('all_qb_df.csv')
    all_rb_df.to_csv('all_rb_df.csv')
    all_wrTE_df.to_csv('all_wrTE_df.csv')
    all_P_df.to_csv('all_P_df.csv')

    print('done')
        



['http://www.espn.com/nfl/player/_/id/4241479/tua-tagovailoa', 'http://www.espn.com/nfl/player/_/id/3046779/jared-goff', 'http://www.espn.com/nfl/player/_/id/2577417/dak-prescott', 'http://www.espn.com/nfl/player/_/id/3918298/josh-allen', 'http://www.espn.com/nfl/player/_/id/4361741/brock-purdy', 'http://www.espn.com/nfl/player/_/id/3139477/patrick-mahomes', 'http://www.espn.com/nfl/player/_/id/4036378/jordan-love', 'http://www.espn.com/nfl/player/_/id/4432577/cj-stroud', 'http://www.espn.com/nfl/player/_/id/3052587/baker-mayfield', 'http://www.espn.com/nfl/player/_/id/4360310/trevor-lawrence', 'http://www.espn.com/nfl/player/_/id/12483/matthew-stafford', 'http://www.espn.com/nfl/player/_/id/4426875/sam-howell', 'http://www.espn.com/nfl/player/_/id/16757/derek-carr', 'http://www.espn.com/nfl/player/_/id/4040715/jalen-hurts', 'http://www.espn.com/nfl/player/_/id/3916387/lamar-jackson', 'http://www.espn.com/nfl/player/_/id/15864/geno-smith', 'http://www.espn.com/nfl/player/_/id/4038524/g