In [1]:
# Import packages
import pandas as pd
import numpy as np
import math
import requests
from bs4 import BeautifulSoup
import os

# Notebook Description

This script uses Beautiful Soup to scrape historical average draft position data from myfantasyleague.com. Based on hours of web searching, this website seems to be the only reputable site that contains such information (going back to the 2013 season). Data includes RB, WR, and TE positions and is specific to non-PPR redraft leagues. Additional data is inferred about "position ambiguity", which is a binary variable flagging whether or not a given player (drafted between rounds 3 through 9) was on a team in which another player (playing the same position) was drafted between rounds 3 through 9. Separate data sets are generated for all players and rookies only.

# Web Scraping Historic ADP Data

In [2]:
def myleague_adp_data(year: str, rookies: bool):
    '''
    Function to scrape ADP (average draft position) data from https://api.myfantasyleague.com/
    This returns all players who were drafted in at least 25% of leagues
    '''
    
    # Remove warnings
    pd.options.mode.chained_assignment = None
    
    # Base url
    base_url = 'https://api.myfantasyleague.com/'
    
    # All players or rookie-only url
    url_spec = f'/reports?R=ADP&POS=RB%2BWR%2BTE&ROOKIES={int(rookies)}&INJURED=1&CUTOFF=25&FCOUNT=0&IS_PPR=1&IS_KEEPER=N&IS_MOCK=1&PERIOD=AUG15'
    
    # Extract table HTML from input URL
    page = requests.get(base_url + str(year) + url_spec + '&PAGE=ALL')
    soup = BeautifulSoup(page.text, 'lxml')
    table = soup.find('table', attrs={'class': 'report'})
    
    # Convert table HTML to pandas df and clean up
    df = pd.read_html(str(table))[0]
    df.drop(df.tail(1).index, inplace=True) # Drop last row (Page: 1 2 3 4 ALL)
    df['Year'] = year
    df['ADP'] = df['Avg Pick'].astype(float)                                     # Rename column and force to float
    df[['LastName', 'Player']] = df.Player.str.split(", ", expand=True)          # Expand player info across columns
    df[['FirstName', 'Team', 'Position']] = df.Player.str.split(expand=True)     # Ex: McCaffrey, Christian CAR RB
    df['FirstName'] = df['FirstName'].str.replace('[^\w\s]', '', regex=True)     # Remove punctuation
    df['LastName'] = df['LastName'].str.replace('[^\w\s]', '', regex=True)
    df['LastName'] = df['LastName'].str.replace(' III', '', regex=True)          # with naming convention in
    df['LastName'] = df['LastName'].str.replace(' II', '', regex=True)           # Edit names for consistency
    df['LastName'] = df['LastName'].str.replace(' IV', '', regex=True)           # other data sets (no suffix)
    df['LastName'] = df['LastName'].str.replace(' V', '', regex=True)
    df['LastName'] = df['LastName'].str.replace(' Sr', '', regex=True)
    df['LastName'] = df['LastName'].str.replace(' Jr', '', regex=True)
    df = df[['FirstName', 'LastName', 'Team', 'Position', 'ADP', 'Year']]        # Order columns
    df = df[(df.Team != "FA") & (df.Team != "FA*")]                              # Remove free agents
    
    # Convert team names for consistency
    team_conv = {'GBP': 'GNB', 'JAC': 'JAX', 'KCC': 'KAN', 'NOS': 'NOR', 'NEP': 'NWE', 'RAM': 'LAR', 'SDC': 'SDG', 'TBB': 'TAM'}
    df = df.replace({'Team': team_conv})
    
    return df


In [3]:
def add_competition_data(adp_df):
    '''
    Function to get positional competition data for each player, including
        - Positional rank on team
        - ADP of teammates in same position
        - Positional ambiguity (T/F)
        - Handcuff (T/F) - not yet added
    '''
    
    # Remove warnings
    pd.options.mode.chained_assignment = None
    
    teams = list(set(adp_df.Team))
    team_dfs = []
    # Get positional competition data for each team by position
    for team in teams:
        team_pos_dfs = [] # List used to store each positional data frame create for a given team
        positions = ['RB', 'WR', 'TE']
        for position in positions:
            
            # Determine positional ranks of players with same team/position
            pos_team_df = adp_df[(adp_df.Team == team) & (adp_df.Position == position)].reset_index(drop=True)
            pos_team_df["TeamPosRank"] = pos_team_df.index + 1
            pos_team_df["TeamPosRank"] = pos_team_df.Position + pos_team_df.TeamPosRank.astype(str)
            
            # Determine ADPs of the lead and secondary position players on the team
            pos_adps = sorted(pos_team_df.ADP)
            
            # RB1/WR1/TE1 ADP in team will be the first element of pos_adps
            try:
                pos1_adp = pos_adps[0]
            except:
                pos1_adp = np.nan
            
            # RB2/WR2/TE2 ADP in team will be the second element of pos_adps
            try:
                pos2_adp = pos_adps[1]
            except:
                pos2_adp = np.nan
            pos_team_df['PosRank1_ADP'] = pos1_adp
            pos_team_df['PosRank2_ADP'] = pos2_adp
            
            # Determine if team position is ambiguous
            # Ambiguous: Multiple players in a position drafted between rounds 3 through 9
            try:
                ambiguous = (pos1_adp > 24) & (pos1_adp <= 108) & (pos2_adp > 24) & (pos2_adp <= 108)
            except:
                ambiguous = False
            pos_team_df['PosAmbiguity'] = ambiguous
            
            # Append data frame to list
            team_pos_dfs.append(pos_team_df)
            
        team_dfs.append(pd.concat(team_pos_dfs))
        
    # Concatenate the data frames created for each team
    output_df = pd.concat(team_dfs).reset_index(drop=True)
    output_df['PosAmbiguity'] = np.where(output_df['ADP'] > 108, False, output_df['PosAmbiguity']) # Can't be ambiguous if not drafted after 9th rd
    output_df.sort_values(by='ADP', inplace=True)
    return output_df


## All Players

In [4]:
# Create data frame containing all ADP data from 2013 to 2022
years = list(np.arange(2013, 2023))
adp_df = [myleague_adp_data(year, rookies=False) for year in years] # Scrape all data
adp_df = [add_competition_data(df) for df in adp_df] # Add competition data to all years
adp_df = pd.concat(adp_df)                           # Concatenate data frames from each year
adp_df.to_csv(os.getcwd() + "/../myfantasyleague-adp-data-2013-2022.csv", index=False)


## Rookies Only

In [5]:
# Create data frame containing all ADP data from 2013 to 2022
years = list(np.arange(2013, 2023))
adpRookies_df = [myleague_adp_data(year, rookies=True) for year in years] # Scrape all data
adpRookies_df = pd.concat(adpRookies_df)                                  # Concatenate data frames from each year
adpRookies_df.to_csv(os.getcwd() + "/../myfantasyleague-adp-data-rookies-2013-2022.csv", index=False)
