In [1]:
# OVERVIEW

# Background: 
# The 1st Round of the NFL Draft is a phenomenon that can make or break a team's next season.
# We will experiment with the theory that if a team did well on Offense and poorly on Defense in the previous season, 
# then they would use their highest pick on a player that can improve their Defense (or vice-versa)

# Goal: Predict whether or not a team will draft an Offensive or Defensive player in the 1st Round of the NFL Draft

# Data Points:
# Independent Variable: What unit for the position each team drafted in the 1st Round (Offense=OFF or Defense=DEF)
# (teams can have multiple 1st round picks)
# Dependent Variable: A team's stats and league-wide rankings for the previous season

# Example:
# Bengals 2020 Draft QB=Offense=OFF - Bengals 2019 total passing yards, total rushing yards, turnovers, points for/against, etc.
# Bengals 2019 Stats - https://www.pro-football-reference.com/teams/cin/2019.htm

# Data Scraped From:
# 1st Round Draft History Data - http://www.drafthistory.com/index.php/rounds/round_1
# Team Stats and Rankings - https://www.pro-football-reference.com

In [2]:
# IMPORTS
# DataFrame
import pandas as pd

# Web Scraping
from requests import get
from bs4 import BeautifulSoup

# Math
import math

In [3]:
# CONSTANTS
CURRENT_YEAR = 2020
YEARS = 20
NUM_NEIGHBORS = 5
TEAM_DICT = {'Bengals':'cin','Redskins':'was','Lions':'det','Giants':'nyg','Dolphins':'mia','Chargers':'sdg',
            'Panthers':'car','Cardinals':'crd','Jaguars':'jax','Browns':'cle','Jets':'nyj','Raiders':'rai',
             'Buccaneers':'tam','49ers':'sfo','Broncos':'den','Falcons':'atl','Cowboys':'dal','Eagles':'phi',
             'Vikings':'min','Saints':'nor','Packers':'gnb','Seahawks':'sea','Ravens':'rav','Titans':'oti',
             'Chiefs':'kan','Colts':'clt','Texans':'htx','Bears':'chi','Steelers':'pit','Rams':'ram','Bills':'buf',
             'Patriots':'nwe'}
EMPTY_COLS = ['Off. Rank Ply', 'Off. Rank Y/P', 'Off. Rank Cmp', 'Off. Rank 1stD', 'Off. Rank 1stD', 'Off. Rank Pen',
             'Off. Rank Yds', 'Off. Rank 1stPy', 'Off. Rank #Dr', 'Def. Rank Ply', 'Def. Rank Y/P', 'Def. Rank Cmp',
             'Def. Rank 1stD', 'Def. Rank 1stD', 'Def. Rank Pen', 'Def. Rank Yds', 'Def. Rank 1stPy', 'Def. Rank #Dr',
             'Off. Rank 1stD.1', 'Off. Rank 1stD.2', 'Off. Rank Yds.3', 'Def. Rank 1stD.1', 'Def. Rank 1stD.2', 'Def. Rank Yds.3']
UNIT_DICT = {'QB':'OFF','RB':'OFF','WR':'OFF','TE':'OFF',' C':'OFF',' G':'OFF',' T':'OFF','DT':'DEF','DE':'DEF','LB':'DEF',
             'DB':'DEF'}
BASIC_FACTORS = ['Position Drafted', 'Team', 'Draft Year', 'Pick Number']
TEAM_FACTORS = ['Team PF', 'Team TO', 'Team Att', 'Team Yds.1', 'Team TD', 'Team Int', 'Team Att.1', 'Team Yds.2', 'Team TD.1', 'Team Y/A', 'Team 1stD.2']
OPP_FACTORS = ['Opp. PF', 'Opp. TO', 'Opp. Att', 'Opp. Yds.1', 'Opp. TD', 'Opp. Int', 'Opp. Att.1', 'Opp. Yds.2', 'Opp. TD.1', 'Opp. Y/A', 'Opp. 1stD.2']
OFF_RANK_FACTORS = ['Off. Rank PF', 'Off. Rank Yds.1', 'Off. Rank TO', 'Off. Rank Att']
DEF_RANK_FACTORS = ['Def. Rank PF', 'Def. Rank Yds.1', 'Def. Rank TO', 'Def. Rank Att']
NEXT_DRAFT_ORDER = ['Jaguars', 'Jets', 'Dolphins', 'Falcons', 'Bengals', 'Eagles', 'Lions', 'Giants', 'Panthers', 'Broncos', 
                    'Cowboys', 'Chargers', 'Vikings', 'Patriots', '49ers', 'Raiders', 'Cardinals', 'Colts', 'Redskins', 'Bears', 
                    'Rams', 'Browns', 'Dolphins', 'Ravens', 'Titans', 'Buccaneers', 'Jets', 'Steelers',
                    'Saints', 'Bills', 'Packers', 'Chiefs']

In [4]:
# WEB SCRAPING FUNCTIONS
def get_past_first_rounders() -> list:
    url = 'http://www.drafthistory.com/index.php/rounds/round_1'
    response = get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    table = soup.find(id='main')
    row_containers = table.table.find_all('tr')
    row_containers = row_containers[2:]
    labels = ['Year','Round','Pick','Player','Name','Team','Position','College']
    
    all_rows = []
    should_break = False
    for i in row_containers:
        new_row = []
        cols = i.find_all('td')
        for j in range(0, len(cols)):
            if j == 0:
                try:
                    year = int(cols[j].text)
                    if year == CURRENT_YEAR - YEARS:
                        should_break=True
                        break
                    new_row.append(cols[j].text)
                except:
                    new_row.append(year)
            else:
                new_row.append(cols[j].text)
        if should_break:
            break
        if new_row:
            all_rows.append(new_row)
    
    df = pd.DataFrame(all_rows,columns=labels)
    df.drop(columns=['Round','Player','College'], inplace=True)
    return df

def get_team_stats_by_year_url(team_name: str, year) -> str:
    base_url = 'https://www.pro-football-reference.com/teams/'
    end_url = '.htm'
    try: 
        team_id = TEAM_DICT[team_name]
    except:
        team_id = team_name
    return base_url + team_id + '/' + str(year) + end_url

def get_team_stats(url) -> 'DataFrame':
    response = get(url)
    if response.status_code == 404:
        return pd.DataFrame()
    soup = BeautifulSoup(response.text,'html.parser')
    table_container = soup.find(id='all_team_stats')
    if table_container:
        print('Scraping', url)
    else:
        print(url)
    table = table_container.find(class_='table_outer_container').div.table
    
    label_containers = table.thead.find_all('tr')[1].find_all('th')
    labels = []
    for i in label_containers:
        labels.append(str(i.text))
        
    list_of_rows = []
    row_containers = table.tbody.find_all('tr')
    for i in row_containers:
        row = []
        row.append(i.th)
        stat_containers = i.find_all('td')
        for j in stat_containers:
            try: 
                num = float(j.text)
                row.append(num)
            except:
                row.append(str(j.text).strip())
        list_of_rows.append(row)
    
    return pd.DataFrame(list_of_rows,columns=labels)

def convert_team_stats_to_one_row(df) -> 'DataFrame':
    team_columns = []
    opp_columns = []
    off_rank_columns = []
    def_rank_columns = []
    
    for i in df.columns:
        team_columns.append('Team ' + i)
        opp_columns.append('Opp. ' + i)
        off_rank_columns.append('Off. Rank ' + i)
        def_rank_columns.append('Def. Rank ' + i)
        
    row1 = pd.DataFrame([list(df.loc[0])], columns=team_columns)
    row2 = pd.DataFrame([list(df.loc[1])], columns=opp_columns)
    row3 = pd.DataFrame([list(df.loc[2])], columns=off_rank_columns)
    row4 = pd.DataFrame([list(df.loc[3])], columns=def_rank_columns)
    
    new_df = pd.concat([row1, row2, row3, row4], axis=1).reindex(row1.index)
    return new_df

def make_team_stats_one_row(df, draft_year, pick_number, team_name, position_drafted) -> 'DataFrame':
    df.drop(columns=['Team Player', 'Opp. Player', 'Off. Rank Player', 'Def. Rank Player'], inplace=True)
    df.insert(0, 'Pick Number', [pick_number], True)
    df.insert(0, 'Draft Year', [draft_year], True)
    df.insert(0, 'Team', [team_name], True)
    df.insert(0, 'Position Drafted', [position_drafted], True)
    return df

def get_raw_past_data() -> 'DataFrame':
    first_rounders = get_past_first_rounders()
    frames = []
    for i in first_rounders.index:
        row = list(first_rounders.loc[i])

        draft_year = row[0]
        pick_number = row[1]
        team_name = row[3]
        position_drafted = row[4]

        stats_url = get_team_stats_by_year_url(team_name, int(draft_year)-1)
        team_stats_df = get_team_stats(stats_url)
        if team_stats_df.empty:
            print('Not Found', stats_url)
            continue
        else:
            pass
        one_row = convert_team_stats_to_one_row(team_stats_df)
        final_row = make_team_stats_one_row(one_row, draft_year, pick_number, team_name, position_drafted)
        frames.append(final_row)
    final_df = pd.concat(frames)
    return final_df

In [5]:
raw = get_raw_past_data()
raw.to_csv('Raw Data For Last ' + str(YEARS) + ' Drafts.csv', encoding = 'utf-8-sig', index=False)

Scraping https://www.pro-football-reference.com/teams/cin/2019.htm
Scraping https://www.pro-football-reference.com/teams/was/2019.htm
Scraping https://www.pro-football-reference.com/teams/det/2019.htm
Scraping https://www.pro-football-reference.com/teams/nyg/2019.htm
Scraping https://www.pro-football-reference.com/teams/mia/2019.htm
Scraping https://www.pro-football-reference.com/teams/sdg/2019.htm
Scraping https://www.pro-football-reference.com/teams/car/2019.htm
Scraping https://www.pro-football-reference.com/teams/crd/2019.htm
Scraping https://www.pro-football-reference.com/teams/jax/2019.htm
Scraping https://www.pro-football-reference.com/teams/cle/2019.htm
Scraping https://www.pro-football-reference.com/teams/nyj/2019.htm
Scraping https://www.pro-football-reference.com/teams/rai/2019.htm
Scraping https://www.pro-football-reference.com/teams/tam/2019.htm
Scraping https://www.pro-football-reference.com/teams/sfo/2019.htm
Scraping https://www.pro-football-reference.com/teams/den/2019

Scraping https://www.pro-football-reference.com/teams/dal/2016.htm
Scraping https://www.pro-football-reference.com/teams/cle/2016.htm
Scraping https://www.pro-football-reference.com/teams/pit/2016.htm
Scraping https://www.pro-football-reference.com/teams/sfo/2016.htm
Scraping https://www.pro-football-reference.com/teams/nor/2016.htm
Scraping https://www.pro-football-reference.com/teams/ram/2015.htm
Scraping https://www.pro-football-reference.com/teams/phi/2015.htm
Scraping https://www.pro-football-reference.com/teams/sdg/2015.htm
Scraping https://www.pro-football-reference.com/teams/dal/2015.htm
Scraping https://www.pro-football-reference.com/teams/jax/2015.htm
Scraping https://www.pro-football-reference.com/teams/rav/2015.htm
Scraping https://www.pro-football-reference.com/teams/sfo/2015.htm
Scraping https://www.pro-football-reference.com/teams/oti/2015.htm
Scraping https://www.pro-football-reference.com/teams/chi/2015.htm
Scraping https://www.pro-football-reference.com/teams/nyg/2015

Scraping https://www.pro-football-reference.com/teams/clt/2012.htm
Scraping https://www.pro-football-reference.com/teams/min/2012.htm
Scraping https://www.pro-football-reference.com/teams/gnb/2012.htm
Scraping https://www.pro-football-reference.com/teams/htx/2012.htm
Scraping https://www.pro-football-reference.com/teams/den/2012.htm
Scraping https://www.pro-football-reference.com/teams/min/2012.htm
Scraping https://www.pro-football-reference.com/teams/ram/2012.htm
Scraping https://www.pro-football-reference.com/teams/dal/2012.htm
Scraping https://www.pro-football-reference.com/teams/rav/2012.htm
Scraping https://www.pro-football-reference.com/teams/clt/2011.htm
Scraping https://www.pro-football-reference.com/teams/was/2011.htm
Scraping https://www.pro-football-reference.com/teams/cle/2011.htm
Scraping https://www.pro-football-reference.com/teams/min/2011.htm
Scraping https://www.pro-football-reference.com/teams/jax/2011.htm
Scraping https://www.pro-football-reference.com/teams/dal/2011

Scraping https://www.pro-football-reference.com/teams/phi/2008.htm
Scraping https://www.pro-football-reference.com/teams/det/2008.htm
Scraping https://www.pro-football-reference.com/teams/cle/2008.htm
Scraping https://www.pro-football-reference.com/teams/min/2008.htm
Scraping https://www.pro-football-reference.com/teams/rav/2008.htm
Scraping https://www.pro-football-reference.com/teams/atl/2008.htm
Scraping https://www.pro-football-reference.com/teams/mia/2008.htm
Scraping https://www.pro-football-reference.com/teams/gnb/2008.htm
Scraping https://www.pro-football-reference.com/teams/clt/2008.htm
Scraping https://www.pro-football-reference.com/teams/buf/2008.htm
Scraping https://www.pro-football-reference.com/teams/nyg/2008.htm
Scraping https://www.pro-football-reference.com/teams/oti/2008.htm
Scraping https://www.pro-football-reference.com/teams/crd/2008.htm
Scraping https://www.pro-football-reference.com/teams/pit/2008.htm
Scraping https://www.pro-football-reference.com/teams/mia/2007

Scraping https://www.pro-football-reference.com/teams/htx/2004.htm
Scraping https://www.pro-football-reference.com/teams/cin/2004.htm
Scraping https://www.pro-football-reference.com/teams/min/2004.htm
Scraping https://www.pro-football-reference.com/teams/ram/2004.htm
Scraping https://www.pro-football-reference.com/teams/dal/2004.htm
Scraping https://www.pro-football-reference.com/teams/jax/2004.htm
Scraping https://www.pro-football-reference.com/teams/rav/2004.htm
Scraping https://www.pro-football-reference.com/teams/rai/2004.htm
Scraping https://www.pro-football-reference.com/teams/gnb/2004.htm
Scraping https://www.pro-football-reference.com/teams/was/2004.htm
Scraping https://www.pro-football-reference.com/teams/sea/2004.htm
Scraping https://www.pro-football-reference.com/teams/atl/2004.htm
Scraping https://www.pro-football-reference.com/teams/sdg/2004.htm
Scraping https://www.pro-football-reference.com/teams/clt/2004.htm
Scraping https://www.pro-football-reference.com/teams/pit/2004

Scraping https://www.pro-football-reference.com/teams/car/2000.htm
Scraping https://www.pro-football-reference.com/teams/ram/2000.htm
Scraping https://www.pro-football-reference.com/teams/jax/2000.htm
Scraping https://www.pro-football-reference.com/teams/tam/2000.htm
Scraping https://www.pro-football-reference.com/teams/was/2000.htm
Scraping https://www.pro-football-reference.com/teams/nyj/2000.htm
Scraping https://www.pro-football-reference.com/teams/sea/2000.htm
Scraping https://www.pro-football-reference.com/teams/det/2000.htm
Scraping https://www.pro-football-reference.com/teams/pit/2000.htm
Scraping https://www.pro-football-reference.com/teams/ram/2000.htm
Scraping https://www.pro-football-reference.com/teams/buf/2000.htm
Scraping https://www.pro-football-reference.com/teams/nyg/2000.htm
Scraping https://www.pro-football-reference.com/teams/nor/2000.htm
Scraping https://www.pro-football-reference.com/teams/den/2000.htm
Scraping https://www.pro-football-reference.com/teams/phi/2000

In [6]:
# Notice that certain values are not Null=NaN, but empty
raw

Unnamed: 0,Position Drafted,Team,Draft Year,Pick Number,Team PF,Team Yds,Team Ply,Team Y/P,Team TO,Team FL,...,Def. Rank Yds,Def. Rank 1stPy,Def. Rank #Dr,Def. Rank Sc%,Def. Rank TO%,Def. Rank Start,Def. Rank Time,Def. Rank Plays,Def. Rank Yds.1,Def. Rank Pts
0,QB,Bengals,2020,1,279.0,5169.0,1049.0,4.9,30.0,14.0,...,,,,9.0,27.0,12.0,18.0,15.0,28.0,22.0
0,DE,Redskins,2020,2,266.0,4395.0,885.0,5.0,21.0,8.0,...,,,,4.0,15.0,31.0,31.0,29.0,29.0,30.0
0,DB,Lions,2020,3,341.0,5549.0,1021.0,5.4,23.0,8.0,...,,,,7.0,25.0,3.0,21.0,28.0,30.0,24.0
0,T,Giants,2020,4,341.0,5416.0,1012.0,5.4,33.0,16.0,...,,,,8.0,29.0,25.0,19.0,17.0,20.0,25.0
0,QB,Dolphins,2020,5,306.0,4960.0,1022.0,4.9,26.0,8.0,...,,,,1.0,28.0,27.0,20.0,20.0,26.0,32.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,RB,Vikings,2001,27,397.0,5961.0,958.0,6.2,28.0,10.0,...,,,,6.0,31.0,7.0,30.0,30.0,30.0,27.0
0,DB,Raiders,2001,28,479.0,5776.0,1023.0,5.6,20.0,9.0,...,,,,24.0,7.0,1.0,5.0,15.0,18.0,11.0
0,DE,Rams,2001,29,540.0,7075.0,1014.0,7.0,35.0,12.0,...,,,,2.0,22.0,25.0,15.0,14.0,24.0,30.0
0,WR,Colts,2001,30,429.0,6141.0,1026.0,6.0,29.0,14.0,...,,,,12.0,23.0,3.0,25.0,28.0,25.0,19.0


In [7]:
# Recreate raw data from file so that empty values are not perceived as Null=NaN
raw = pd.read_csv('Raw Data For Last ' + str(YEARS) + ' Drafts.csv', encoding = 'utf-8')
raw

Unnamed: 0,Position Drafted,Team,Draft Year,Pick Number,Team PF,Team Yds,Team Ply,Team Y/P,Team TO,Team FL,...,Def. Rank Yds.3,Def. Rank 1stPy,Def. Rank #Dr,Def. Rank Sc%,Def. Rank TO%,Def. Rank Start,Def. Rank Time,Def. Rank Plays,Def. Rank Yds.4,Def. Rank Pts
0,QB,Bengals,2020,1,279.0,5169.0,1049.0,4.9,30.0,14.0,...,,,,9.0,27.0,12.0,18.0,15.0,28.0,22.0
1,DE,Redskins,2020,2,266.0,4395.0,885.0,5.0,21.0,8.0,...,,,,4.0,15.0,31.0,31.0,29.0,29.0,30.0
2,DB,Lions,2020,3,341.0,5549.0,1021.0,5.4,23.0,8.0,...,,,,7.0,25.0,3.0,21.0,28.0,30.0,24.0
3,T,Giants,2020,4,341.0,5416.0,1012.0,5.4,33.0,16.0,...,,,,8.0,29.0,25.0,19.0,17.0,20.0,25.0
4,QB,Dolphins,2020,5,306.0,4960.0,1022.0,4.9,26.0,8.0,...,,,,1.0,28.0,27.0,20.0,20.0,26.0,32.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
631,RB,Vikings,2001,27,397.0,5961.0,958.0,6.2,28.0,10.0,...,,,,6.0,31.0,7.0,30.0,30.0,30.0,27.0
632,DB,Raiders,2001,28,479.0,5776.0,1023.0,5.6,20.0,9.0,...,,,,24.0,7.0,1.0,5.0,15.0,18.0,11.0
633,DE,Rams,2001,29,540.0,7075.0,1014.0,7.0,35.0,12.0,...,,,,2.0,22.0,25.0,15.0,14.0,24.0,30.0
634,WR,Colts,2001,30,429.0,6141.0,1026.0,6.0,29.0,14.0,...,,,,12.0,23.0,3.0,25.0,28.0,25.0,19.0


In [8]:
# Data Cleaning
def clean_raw_df(raw, save, keep_id) -> 'DataFrame':
    
    # Convert non-Numerical data types to Numerical
    raw = _convert_string_data(raw)
    
    # Keep only the columns of data that we want
    raw = _isolate_key_columns(raw)
    
    # Create an ID column for later reference
    raw = _add_id_column(raw)
    
    # Save a copy for reference to identify what team/position/year occurred for stats
    if save:
        _save_reference_df(raw)
    
    # Remove Identifying Columns
    if keep_id == False:
        raw = _remove_id_columns(raw)
    
    return raw

def _convert_string_data(df) -> 'DataFrame':
    team_start_list = []
    opp_start_list = []
    for i in list(df['Team Start']):
        team_start_list.append(float(str(i)[4:].strip()))
    for i in list(df['Opp. Start']):
        opp_start_list.append(float(str(i)[4:].strip()))
    
    df['Team Start'] = team_start_list
    df['Opp. Start'] = opp_start_list
    
    team_time_list = []
    opp_time_list = []
    for i in list(df['Team Time']):
        team_time_list.append(float((int(str(i)[0])*60)+int(str(i)[2:])))
    for i in list(df['Opp. Time']):
        opp_time_list.append(float((int(str(i)[0])*60)+int(str(i)[2:])))
        
    df['Team Time'] = team_time_list
    df['Opp. Time'] = opp_time_list
    
    return df
    
def _isolate_key_columns(df):
    factors = BASIC_FACTORS + TEAM_FACTORS + OPP_FACTORS + OFF_RANK_FACTORS + DEF_RANK_FACTORS
    df = df[factors]
    return df

def _add_id_column(df) -> 'DataFrame':
    df = df.reset_index()
    df = df.rename(columns={'index':'ID'})
    return df
    
def _save_reference_df(df):
    reference_df = df.copy(deep=True)
    reference_df.to_csv('Clean Reference For Last ' + str(YEARS) + ' Drafts.csv', encoding = 'utf-8-sig', index=False)
    
def _remove_id_columns(df):
    df = df.drop(columns=['Position Drafted', 'Team', 'Draft Year'])
    return df

In [9]:
# We will use this dataframe for our model since it only has Numerical values, 
# but we will not use the ID when calculating distance
clean = clean_raw_df(raw, True, False)
clean

Unnamed: 0,ID,Pick Number,Team PF,Team TO,Team Att,Team Yds.1,Team TD,Team Int,Team Att.1,Team Yds.2,...,Opp. Y/A,Opp. 1stD.2,Off. Rank PF,Off. Rank Yds.1,Off. Rank TO,Off. Rank Att,Def. Rank PF,Def. Rank Yds.1,Def. Rank TO,Def. Rank Att
0,0,1,279.0,30.0,616.0,3652.0,18.0,16.0,385.0,1517.0,...,4.7,124.0,30.0,19.0,27.0,6.0,25.0,21.0,28.0,2.0
1,1,2,266.0,21.0,479.0,2812.0,18.0,13.0,356.0,1583.0,...,4.7,120.0,32.0,32.0,16.0,28.0,27.0,18.0,15.0,11.0
2,2,3,341.0,23.0,571.0,3900.0,28.0,15.0,407.0,1649.0,...,4.1,94.0,18.0,10.0,19.0,17.0,26.0,32.0,24.0,31.0
3,3,4,341.0,33.0,607.0,3731.0,30.0,17.0,362.0,1685.0,...,3.9,100.0,18.0,18.0,30.0,9.0,30.0,28.0,28.0,17.0
4,4,5,306.0,26.0,615.0,3804.0,22.0,18.0,349.0,1156.0,...,4.5,113.0,25.0,12.0,25.0,7.0,32.0,26.0,28.0,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
631,631,27,397.0,28.0,495.0,3832.0,33.0,18.0,428.0,2129.0,...,4.5,110.0,5.0,7.0,13.0,23.0,24.0,28.0,31.0,28.0
632,632,28,479.0,20.0,475.0,3306.0,28.0,11.0,520.0,2470.0,...,4.0,85.0,3.0,15.0,2.0,25.0,9.0,25.0,7.0,30.0
633,633,29,540.0,35.0,587.0,5232.0,37.0,23.0,383.0,1843.0,...,4.4,95.0,1.0,1.0,24.0,3.0,31.0,27.0,20.0,17.0
634,634,30,429.0,29.0,571.0,4282.0,33.0,15.0,435.0,1859.0,...,4.3,109.0,4.0,2.0,15.0,8.0,15.0,18.0,25.0,14.0


In [10]:
# We will use this dataframe to reference the Team/Year/Position Drafted for our test values and our test's closest neighbors
reference = pd.read_csv('Clean Reference For Last ' + str(YEARS) + ' Drafts.csv', encoding = 'utf-8')
reference

Unnamed: 0,ID,Position Drafted,Team,Draft Year,Pick Number,Team PF,Team TO,Team Att,Team Yds.1,Team TD,...,Opp. Y/A,Opp. 1stD.2,Off. Rank PF,Off. Rank Yds.1,Off. Rank TO,Off. Rank Att,Def. Rank PF,Def. Rank Yds.1,Def. Rank TO,Def. Rank Att
0,0,QB,Bengals,2020,1,279.0,30.0,616.0,3652.0,18.0,...,4.7,124.0,30.0,19.0,27.0,6.0,25.0,21.0,28.0,2.0
1,1,DE,Redskins,2020,2,266.0,21.0,479.0,2812.0,18.0,...,4.7,120.0,32.0,32.0,16.0,28.0,27.0,18.0,15.0,11.0
2,2,DB,Lions,2020,3,341.0,23.0,571.0,3900.0,28.0,...,4.1,94.0,18.0,10.0,19.0,17.0,26.0,32.0,24.0,31.0
3,3,T,Giants,2020,4,341.0,33.0,607.0,3731.0,30.0,...,3.9,100.0,18.0,18.0,30.0,9.0,30.0,28.0,28.0,17.0
4,4,QB,Dolphins,2020,5,306.0,26.0,615.0,3804.0,22.0,...,4.5,113.0,25.0,12.0,25.0,7.0,32.0,26.0,28.0,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
631,631,RB,Vikings,2001,27,397.0,28.0,495.0,3832.0,33.0,...,4.5,110.0,5.0,7.0,13.0,23.0,24.0,28.0,31.0,28.0
632,632,DB,Raiders,2001,28,479.0,20.0,475.0,3306.0,28.0,...,4.0,85.0,3.0,15.0,2.0,25.0,9.0,25.0,7.0,30.0
633,633,DE,Rams,2001,29,540.0,35.0,587.0,5232.0,37.0,...,4.4,95.0,1.0,1.0,24.0,3.0,31.0,27.0,20.0,17.0
634,634,WR,Colts,2001,30,429.0,29.0,571.0,4282.0,33.0,...,4.3,109.0,4.0,2.0,15.0,8.0,15.0,18.0,25.0,14.0


In [11]:
# Split into Train / Test Dataframes to Test Model Accuracy on 2020 NFL Draft

def split_dfs_for_testing(final_df) -> list:

    test_df = final_df.iloc[:32]
    train_df = final_df.iloc[32:]
    print(train_df.shape, test_df.shape)

    combine = [train_df, test_df]
    return combine

In [12]:
combine = split_dfs_for_testing(clean)
train_df_1 = combine[0]
test_df_1 = combine[1]
train_df_1.to_csv('Train Data For ' + str(CURRENT_YEAR) + ' Draft.csv', encoding = 'utf-8-sig', index=False)
test_df_1.to_csv('Test Data For ' + str(CURRENT_YEAR) + ' Draft.csv', encoding = 'utf-8-sig', index=False)
train_df_1

(604, 32) (32, 32)


Unnamed: 0,ID,Pick Number,Team PF,Team TO,Team Att,Team Yds.1,Team TD,Team Int,Team Att.1,Team Yds.2,...,Opp. Y/A,Opp. 1stD.2,Off. Rank PF,Off. Rank Yds.1,Off. Rank TO,Off. Rank Att,Def. Rank PF,Def. Rank Yds.1,Def. Rank TO,Def. Rank Att
32,32,1,225.0,28.0,495.0,2523.0,15.0,18.0,355.0,1342.0,...,4.9,139.0,32.0,32.0,27.0,29.0,26.0,4.0,28.0,6.0
33,33,2,342.0,32.0,532.0,3867.0,26.0,20.0,423.0,1902.0,...,4.1,104.0,21.0,15.0,30.0,20.0,28.0,11.0,32.0,14.0
34,34,3,333.0,30.0,524.0,3165.0,18.0,19.0,410.0,1622.0,...,4.6,104.0,23.0,25.0,29.0,22.0,29.0,24.0,16.0,24.0
35,35,4,290.0,24.0,556.0,3751.0,19.0,10.0,387.0,1628.0,...,4.7,108.0,28.0,18.0,22.0,16.0,32.0,19.0,22.0,1.0
36,36,5,396.0,35.0,625.0,5125.0,36.0,26.0,389.0,1523.0,...,4.7,122.0,12.0,1.0,32.0,4.0,31.0,26.0,22.0,12.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
631,631,27,397.0,28.0,495.0,3832.0,33.0,18.0,428.0,2129.0,...,4.5,110.0,5.0,7.0,13.0,23.0,24.0,28.0,31.0,28.0
632,632,28,479.0,20.0,475.0,3306.0,28.0,11.0,520.0,2470.0,...,4.0,85.0,3.0,15.0,2.0,25.0,9.0,25.0,7.0,30.0
633,633,29,540.0,35.0,587.0,5232.0,37.0,23.0,383.0,1843.0,...,4.4,95.0,1.0,1.0,24.0,3.0,31.0,27.0,20.0,17.0
634,634,30,429.0,29.0,571.0,4282.0,33.0,15.0,435.0,1859.0,...,4.3,109.0,4.0,2.0,15.0,8.0,15.0,18.0,25.0,14.0


In [13]:
test_df_1

Unnamed: 0,ID,Pick Number,Team PF,Team TO,Team Att,Team Yds.1,Team TD,Team Int,Team Att.1,Team Yds.2,...,Opp. Y/A,Opp. 1stD.2,Off. Rank PF,Off. Rank Yds.1,Off. Rank TO,Off. Rank Att,Def. Rank PF,Def. Rank Yds.1,Def. Rank TO,Def. Rank Att
0,0,1,279.0,30.0,616.0,3652.0,18.0,16.0,385.0,1517.0,...,4.7,124.0,30.0,19.0,27.0,6.0,25.0,21.0,28.0,2.0
1,1,2,266.0,21.0,479.0,2812.0,18.0,13.0,356.0,1583.0,...,4.7,120.0,32.0,32.0,16.0,28.0,27.0,18.0,15.0,11.0
2,2,3,341.0,23.0,571.0,3900.0,28.0,15.0,407.0,1649.0,...,4.1,94.0,18.0,10.0,19.0,17.0,26.0,32.0,24.0,31.0
3,3,4,341.0,33.0,607.0,3731.0,30.0,17.0,362.0,1685.0,...,3.9,100.0,18.0,18.0,30.0,9.0,30.0,28.0,28.0,17.0
4,4,5,306.0,26.0,615.0,3804.0,22.0,18.0,349.0,1156.0,...,4.5,113.0,25.0,12.0,25.0,7.0,32.0,26.0,28.0,14.0
5,5,6,337.0,31.0,597.0,4426.0,24.0,20.0,366.0,1453.0,...,4.2,101.0,21.0,6.0,29.0,10.0,14.0,5.0,32.0,1.0
6,6,7,340.0,35.0,633.0,3650.0,17.0,21.0,386.0,1819.0,...,5.2,128.0,20.0,20.0,31.0,2.0,31.0,13.0,17.0,12.0
7,7,8,361.0,18.0,554.0,3477.0,20.0,12.0,396.0,1990.0,...,4.4,94.0,16.0,24.0,9.0,18.0,28.0,31.0,25.0,28.0
8,8,9,300.0,20.0,589.0,3760.0,24.0,8.0,389.0,1708.0,...,5.1,114.0,26.0,16.0,13.0,12.0,21.0,16.0,22.0,5.0
9,9,10,335.0,28.0,539.0,3554.0,22.0,21.0,393.0,1901.0,...,5.0,124.0,22.0,22.0,26.0,19.0,20.0,7.0,19.0,3.0


In [14]:
# DATA PREPARATION FUNCTIONS

def convert_df_to_weighted_df(df):
    std_list = list(df.std(axis = 0, skipna= True))
    mean_list = list(df.mean(axis=0, skipna= True))
    frames = [pd.DataFrame(df.loc[df.index[0]], columns=df.columns)]
    for i in range(df.index[0], df.index[0]+len(df.index)):
        row_list = list(df.loc[i])
        weighted_row = []
        for j in range(len(row_list)):
            if j == 0:
                weighted_row.append(row_list[j])
            else:
                weighted_row.append((row_list[j] - mean_list[j])/std_list[j])
        weighted_df_row = pd.DataFrame([weighted_row], columns=df.columns)
        frames.append(weighted_df_row)
    weighted_df = pd.concat(frames)
    weighted_df.index = df.index
    return weighted_df

def ready_dataset_for_modeling(train_df, test_df) -> list:
    weighted_train_df = convert_df_to_weighted_df(train_df)
    weighted_test_df = convert_df_to_weighted_df(test_df)
    
    train_dataset = weighted_train_df.to_numpy().tolist()
    test_dataset = weighted_test_df.to_numpy().tolist()
    
    combine = [train_dataset, test_dataset]
    return combine

In [15]:
model_ready_combine = ready_dataset_for_modeling(train_df_1, test_df_1)
train_dataset_1 = model_ready_combine[0]
test_dataset_1 = model_ready_combine[1]

In [16]:
# K NEAREST NEIGHBORS MODEL FUNCTIONS

def euclidean_distance(row1, row2):
    distance = 0.0
    for i in range(1, len(row1)-1):
        distance += (row1[i] - row2[i])**2
    return math.sqrt(distance)

def get_neighbors(train, test_row, num_neighbors):
    distances = list()
    for i in range(0, len(train)-1):
        ID = train[i][0]
        train_row = train[i]
        dist = euclidean_distance(test_row, train_row)
        distances.append((train_row, dist, ID))
    distances.sort(key=lambda tup: tup[1])
    neighbors = list()
    for i in range(num_neighbors):
        neighbors.append((distances[i][2], round(distances[i][1], 2)))
    return neighbors

def prediction_data(train_dataset, test_dataset, test_row_index, reference_df) -> 'DataFrame':
    neighbors = get_neighbors(train_dataset, test_dataset[test_row_index], NUM_NEIGHBORS)
    
    ID = test_dataset[test_row_index][0]
    frames = []
    id_col = [ID]
    distance_col = [0]
    
    for neighbor in neighbors:
        id_col.append(neighbor[0])
        distance_col.append(neighbor[1])
        frames.append(pd.DataFrame([list(reference_df.loc[int(neighbor[0])])], columns=reference_df.columns))
        
    big_df = pd.concat(frames)
    big_df = pd.DataFrame([reference_df.loc[ID]], columns=reference_df.columns).append(big_df, ignore_index=True)
    big_df.insert(1, 'Unit', big_df['Position Drafted'].map(UNIT_DICT), True)
    big_df.insert(1, 'Distance', distance_col, True)
    
    return big_df

def unit_prediction(train_dataset, test_dataset, test_row_index, reference_df):
    predict_dict = {}
    neighbors = get_neighbors(train_dataset, test_dataset[test_row_index], NUM_NEIGHBORS)
    ID = test_dataset[test_row_index][0]
    
    sum = 0
    for neighbor in neighbors:
        position = list(reference_df.loc[int(neighbor[0])])[1]
        for i in UNIT_DICT.keys():
            if position == i:
                unit = UNIT_DICT[position]
        if unit not in predict_dict:
            predict_dict[unit] = 0
        predict_dict[unit] += 1
        sum += 1
    
    percent_dict = {'OFF':0,'DEF':0,'Predicted':'NA','Actual':'NA'}
    for i in predict_dict:
        percent_dict[i] = round(predict_dict[i]/sum, 4)*100
        
    if percent_dict['OFF'] > percent_dict['DEF']:
        percent_dict['Predicted'] = 'OFF'
    else:
        percent_dict['Predicted'] = 'DEF'
        
    try:
        percent_dict['Actual'] = UNIT_DICT[list(reference_df.loc[ID])[1]]
    except:
        percent_dict['Actual'] = 'NA'
        
    return percent_dict

In [17]:
# Test Closest Neighbors for Number 1 Pick of the 2020 NFL DRAFT
# In other words, out of the past 20 years, which Team Stats are the closest to the Bengals 2019 Season, 
# and what unit did That Team draft their following year?
PICK_NUM = 1
test = prediction_data(train_dataset_1, test_dataset_1, PICK_NUM-1, reference)
test

Unnamed: 0,ID,Distance,Unit,Position Drafted,Team,Draft Year,Pick Number,Team PF,Team TO,Team Att,...,Opp. Y/A,Opp. 1stD.2,Off. Rank PF,Off. Rank Yds.1,Off. Rank TO,Off. Rank Att,Def. Rank PF,Def. Rank Yds.1,Def. Rank TO,Def. Rank Att
0,0,0.0,OFF,QB,Bengals,2020,1,279.0,30.0,616.0,...,4.7,124.0,30.0,19.0,27.0,6.0,25.0,21.0,28.0,2.0
1,576,3.27,OFF,T,Bills,2002,4,265.0,33.0,557.0,...,4.4,122.0,27.0,12.0,19.0,7.0,29.0,13.0,30.0,2.0
2,224,3.39,OFF,T,Jaguars,2013,2,255.0,26.0,586.0,...,4.1,133.0,30.0,21.0,17.0,11.0,29.0,22.0,18.0,9.0
3,417,3.44,DEF,DE,Buccaneers,2007,4,211.0,32.0,535.0,...,3.9,108.0,31.0,26.0,23.0,11.0,21.0,19.0,30.0,6.0
4,478,3.45,OFF,QB,49ers,2005,1,259.0,40.0,561.0,...,4.0,121.0,30.0,20.0,30.0,6.0,32.0,19.0,27.0,8.0
5,255,3.46,OFF,QB,Colts,2012,1,243.0,29.0,534.0,...,4.3,136.0,28.0,27.0,23.0,21.0,28.0,15.0,30.0,7.0


In [18]:
# What percentage of the k-nearest neighbors drafted OFF vs. DEF, and what was the actual drafted for the test?
unit_prediction(train_dataset_1, test_dataset_1, PICK_NUM-1, reference)

{'OFF': 80.0, 'DEF': 20.0, 'Predicted': 'OFF', 'Actual': 'OFF'}

In [19]:
# MODEL TESTING FUNCTIONS

def test_unit_prediction(train_dataset, test_dataset, reference_df):
    correct_predictions = 0
    eighty_plus_percent_correct = 0
    total_eighty = 0
    print(list(reference_df.loc[test_dataset[0][0]])[3])
    
    length = len(test_dataset)
    for i in range(length):
        predict_dict = unit_prediction(train_dataset, test_dataset, i, reference_df)
        if predict_dict['Actual'] == predict_dict['Predicted']:
            correct_predictions += 1
            if predict_dict[predict_dict['Predicted']] >= 80:
                eighty_plus_percent_correct += 1
        if predict_dict[predict_dict['Predicted']] >= 80:
            total_eighty += 1
        print(predict_dict, 'Total ' + str(correct_predictions)+'/'+str(i+1))
        
    correct_pred_percent = round(correct_predictions/length, 4)
    eighty_plus_percent_accuracy = round(eighty_plus_percent_correct / total_eighty, 4)
    
    print()
    print('Correct Predictions %: ', correct_pred_percent)
    print('Correct 80+% Predictions %: ', eighty_plus_percent_accuracy)
    print()
    
    return (correct_pred_percent, eighty_plus_percent_accuracy)

def test_model_across_past_years(train_df, test_df, reference_df, years):
    total_correct = 0
    total_eighty_correct = 0
    train = train_df.copy(deep=True)
    test = test_df.copy(deep=True)
    for i in range(years):
        model_ready_combine = ready_dataset_for_modeling(train, test)
        train_dataset = model_ready_combine[0]
        test_dataset = model_ready_combine[1]
        predict = test_unit_prediction(train_dataset, test_dataset, reference_df)
        total_correct += predict[0]
        total_eighty_correct += predict[1]
        test = train.iloc[:32]
        train = train.drop(train.head(32).index)
        
    average_correct = round(total_correct / years, 4)*100
    average_eighty_correct = round(total_eighty_correct / years, 4)*100
    
    print('Average Correct Predictions %: ', average_correct)
    print('Average Correct 80+% Predictions %: ', average_eighty_correct)
    return (average_correct, average_eighty_correct)

In [20]:
# Out of 32 Picks for the 2020 NFL Draft, what percentage of them are correctly predicted 
# using data from the 5 Nearest Neighbors?
test_unit_prediction(train_dataset_1, test_dataset_1, reference)

2020
{'OFF': 80.0, 'DEF': 20.0, 'Predicted': 'OFF', 'Actual': 'OFF'} Total 1/1
{'OFF': 40.0, 'DEF': 60.0, 'Predicted': 'DEF', 'Actual': 'DEF'} Total 2/2
{'OFF': 40.0, 'DEF': 60.0, 'Predicted': 'DEF', 'Actual': 'DEF'} Total 3/3
{'OFF': 80.0, 'DEF': 20.0, 'Predicted': 'OFF', 'Actual': 'OFF'} Total 4/4
{'OFF': 100.0, 'DEF': 0, 'Predicted': 'OFF', 'Actual': 'OFF'} Total 5/5
{'OFF': 80.0, 'DEF': 20.0, 'Predicted': 'OFF', 'Actual': 'OFF'} Total 6/6
{'OFF': 40.0, 'DEF': 60.0, 'Predicted': 'DEF', 'Actual': 'DEF'} Total 7/7
{'OFF': 20.0, 'DEF': 80.0, 'Predicted': 'DEF', 'Actual': 'DEF'} Total 8/8
{'OFF': 20.0, 'DEF': 80.0, 'Predicted': 'DEF', 'Actual': 'DEF'} Total 9/9
{'OFF': 40.0, 'DEF': 60.0, 'Predicted': 'DEF', 'Actual': 'OFF'} Total 9/10
{'OFF': 60.0, 'DEF': 40.0, 'Predicted': 'OFF', 'Actual': 'OFF'} Total 10/11
{'OFF': 40.0, 'DEF': 60.0, 'Predicted': 'DEF', 'Actual': 'OFF'} Total 10/12
{'OFF': 60.0, 'DEF': 40.0, 'Predicted': 'OFF', 'Actual': 'OFF'} Total 11/13
{'OFF': 40.0, 'DEF': 60.0, '

(0.6875, 0.8125)

In [21]:
# Now we can begin testing the model for drafts before the 2020 draft
# For example, we will test the average scores of the model by also predicting the 2019 Draft using draft data from 2001-2018
# And to test the 2018 Draft, we will use draft data from 2001-2017
test_model_across_past_years(train_df_1, test_df_1, reference, 2)

2020
{'OFF': 80.0, 'DEF': 20.0, 'Predicted': 'OFF', 'Actual': 'OFF'} Total 1/1
{'OFF': 40.0, 'DEF': 60.0, 'Predicted': 'DEF', 'Actual': 'DEF'} Total 2/2
{'OFF': 40.0, 'DEF': 60.0, 'Predicted': 'DEF', 'Actual': 'DEF'} Total 3/3
{'OFF': 80.0, 'DEF': 20.0, 'Predicted': 'OFF', 'Actual': 'OFF'} Total 4/4
{'OFF': 100.0, 'DEF': 0, 'Predicted': 'OFF', 'Actual': 'OFF'} Total 5/5
{'OFF': 80.0, 'DEF': 20.0, 'Predicted': 'OFF', 'Actual': 'OFF'} Total 6/6
{'OFF': 40.0, 'DEF': 60.0, 'Predicted': 'DEF', 'Actual': 'DEF'} Total 7/7
{'OFF': 20.0, 'DEF': 80.0, 'Predicted': 'DEF', 'Actual': 'DEF'} Total 8/8
{'OFF': 20.0, 'DEF': 80.0, 'Predicted': 'DEF', 'Actual': 'DEF'} Total 9/9
{'OFF': 40.0, 'DEF': 60.0, 'Predicted': 'DEF', 'Actual': 'OFF'} Total 9/10
{'OFF': 60.0, 'DEF': 40.0, 'Predicted': 'OFF', 'Actual': 'OFF'} Total 10/11
{'OFF': 40.0, 'DEF': 60.0, 'Predicted': 'DEF', 'Actual': 'OFF'} Total 10/12
{'OFF': 60.0, 'DEF': 40.0, 'Predicted': 'OFF', 'Actual': 'OFF'} Total 11/13
{'OFF': 40.0, 'DEF': 60.0, '

(56.25, 69.78999999999999)

In [22]:
# Based off of the 2020 Data we have around a 70% success rate on predicting 
# whether a team will draft an Offensive or Defensive Player in the 1st Round of the NFL Draft
# Now we will use the model for each of the team's stats in 2020 to predict the results of the 2021 NFL Draft
frames = []
for i in range(len(NEXT_DRAFT_ORDER)):
    url = get_team_stats_by_year_url(NEXT_DRAFT_ORDER[i], 2020)
    df = get_team_stats(url)
    row_df = convert_team_stats_to_one_row(df)
    final_row_df = make_team_stats_one_row(row_df, CURRENT_YEAR+1, i+1, NEXT_DRAFT_ORDER[i], None)
    frames.append(final_row_df)
test_df_2 = pd.concat(frames)
test_df_2.to_csv('Raw Data For ' + str(CURRENT_YEAR+1) + ' Draft.csv', encoding = 'utf-8-sig', index=False)
test_df_2 = pd.read_csv('Raw Data For ' + str(CURRENT_YEAR+1) + ' Draft.csv', encoding = 'utf-8')
test_df_2 = clean_raw_df(test_df_2, False, True)
test_df_2

Scraping https://www.pro-football-reference.com/teams/jax/2020.htm
Scraping https://www.pro-football-reference.com/teams/nyj/2020.htm
Scraping https://www.pro-football-reference.com/teams/mia/2020.htm
Scraping https://www.pro-football-reference.com/teams/atl/2020.htm
Scraping https://www.pro-football-reference.com/teams/cin/2020.htm
Scraping https://www.pro-football-reference.com/teams/phi/2020.htm
Scraping https://www.pro-football-reference.com/teams/det/2020.htm
Scraping https://www.pro-football-reference.com/teams/nyg/2020.htm
Scraping https://www.pro-football-reference.com/teams/car/2020.htm
Scraping https://www.pro-football-reference.com/teams/den/2020.htm
Scraping https://www.pro-football-reference.com/teams/dal/2020.htm
Scraping https://www.pro-football-reference.com/teams/sdg/2020.htm
Scraping https://www.pro-football-reference.com/teams/min/2020.htm
Scraping https://www.pro-football-reference.com/teams/nwe/2020.htm
Scraping https://www.pro-football-reference.com/teams/sfo/2020

Unnamed: 0,ID,Position Drafted,Team,Draft Year,Pick Number,Team PF,Team TO,Team Att,Team Yds.1,Team TD,...,Opp. Y/A,Opp. 1stD.2,Off. Rank PF,Off. Rank Yds.1,Off. Rank TO,Off. Rank Att,Def. Rank PF,Def. Rank Yds.1,Def. Rank TO,Def. Rank Att
0,0,,Jaguars,2021,1,292.0,24.0,574.0,3469.0,23.0,...,4.5,128.0,29.0,20.0,27.0,7.0,31.0,27.0,26.0,8.0
1,1,,Jets,2021,2,229.0,17.0,465.0,2557.0,15.0,...,3.9,93.0,32.0,32.0,11.0,29.0,27.0,29.0,21.0,28.0
2,2,,Dolphins,2021,3,378.0,16.0,500.0,3352.0,23.0,...,4.6,109.0,15.0,25.0,5.0,24.0,1.0,19.0,1.0,9.0
3,3,,Falcons,2021,4,369.0,16.0,584.0,4105.0,25.0,...,4.4,92.0,17.0,5.0,5.0,6.0,19.0,31.0,14.0,30.0
4,4,,Bengals,2021,5,308.0,22.0,560.0,3400.0,19.0,...,4.8,107.0,27.0,23.0,23.0,11.0,20.0,23.0,26.0,18.0
5,5,,Eagles,2021,6,320.0,26.0,565.0,3229.0,22.0,...,4.2,92.0,25.0,27.0,30.0,10.0,22.0,20.0,24.0,5.0
6,6,,Lions,2021,7,342.0,19.0,551.0,3816.0,24.0,...,4.5,119.0,24.0,11.0,17.0,14.0,32.0,30.0,30.0,14.0
7,7,,Giants,2021,8,257.0,20.0,492.0,2815.0,10.0,...,4.1,107.0,31.0,29.0,19.0,25.0,10.0,16.0,9.0,21.0
8,8,,Panthers,2021,9,343.0,16.0,513.0,3642.0,16.0,...,4.7,105.0,23.0,17.0,5.0,20.0,18.0,17.0,5.0,26.0
9,9,,Broncos,2021,10,292.0,32.0,515.0,3127.0,19.0,...,4.8,98.0,29.0,28.0,32.0,19.0,25.0,12.0,30.0,22.0


In [23]:
train_df_2 = pd.read_csv('Clean Reference For Last ' + str(YEARS) + ' Drafts.csv', encoding = 'utf-8')
train_df_2['ID'] = train_df_2['ID'] + 32
train_df_2

Unnamed: 0,ID,Position Drafted,Team,Draft Year,Pick Number,Team PF,Team TO,Team Att,Team Yds.1,Team TD,...,Opp. Y/A,Opp. 1stD.2,Off. Rank PF,Off. Rank Yds.1,Off. Rank TO,Off. Rank Att,Def. Rank PF,Def. Rank Yds.1,Def. Rank TO,Def. Rank Att
0,32,QB,Bengals,2020,1,279.0,30.0,616.0,3652.0,18.0,...,4.7,124.0,30.0,19.0,27.0,6.0,25.0,21.0,28.0,2.0
1,33,DE,Redskins,2020,2,266.0,21.0,479.0,2812.0,18.0,...,4.7,120.0,32.0,32.0,16.0,28.0,27.0,18.0,15.0,11.0
2,34,DB,Lions,2020,3,341.0,23.0,571.0,3900.0,28.0,...,4.1,94.0,18.0,10.0,19.0,17.0,26.0,32.0,24.0,31.0
3,35,T,Giants,2020,4,341.0,33.0,607.0,3731.0,30.0,...,3.9,100.0,18.0,18.0,30.0,9.0,30.0,28.0,28.0,17.0
4,36,QB,Dolphins,2020,5,306.0,26.0,615.0,3804.0,22.0,...,4.5,113.0,25.0,12.0,25.0,7.0,32.0,26.0,28.0,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
631,663,RB,Vikings,2001,27,397.0,28.0,495.0,3832.0,33.0,...,4.5,110.0,5.0,7.0,13.0,23.0,24.0,28.0,31.0,28.0
632,664,DB,Raiders,2001,28,479.0,20.0,475.0,3306.0,28.0,...,4.0,85.0,3.0,15.0,2.0,25.0,9.0,25.0,7.0,30.0
633,665,DE,Rams,2001,29,540.0,35.0,587.0,5232.0,37.0,...,4.4,95.0,1.0,1.0,24.0,3.0,31.0,27.0,20.0,17.0
634,666,WR,Colts,2001,30,429.0,29.0,571.0,4282.0,33.0,...,4.3,109.0,4.0,2.0,15.0,8.0,15.0,18.0,25.0,14.0


In [24]:
reference_df2 = pd.concat([test_df_2, train_df_2])
reference_df2 = reference_df2.reset_index(drop=True)
reference_df2

Unnamed: 0,ID,Position Drafted,Team,Draft Year,Pick Number,Team PF,Team TO,Team Att,Team Yds.1,Team TD,...,Opp. Y/A,Opp. 1stD.2,Off. Rank PF,Off. Rank Yds.1,Off. Rank TO,Off. Rank Att,Def. Rank PF,Def. Rank Yds.1,Def. Rank TO,Def. Rank Att
0,0,,Jaguars,2021,1,292.0,24.0,574.0,3469.0,23.0,...,4.5,128.0,29.0,20.0,27.0,7.0,31.0,27.0,26.0,8.0
1,1,,Jets,2021,2,229.0,17.0,465.0,2557.0,15.0,...,3.9,93.0,32.0,32.0,11.0,29.0,27.0,29.0,21.0,28.0
2,2,,Dolphins,2021,3,378.0,16.0,500.0,3352.0,23.0,...,4.6,109.0,15.0,25.0,5.0,24.0,1.0,19.0,1.0,9.0
3,3,,Falcons,2021,4,369.0,16.0,584.0,4105.0,25.0,...,4.4,92.0,17.0,5.0,5.0,6.0,19.0,31.0,14.0,30.0
4,4,,Bengals,2021,5,308.0,22.0,560.0,3400.0,19.0,...,4.8,107.0,27.0,23.0,23.0,11.0,20.0,23.0,26.0,18.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
663,663,RB,Vikings,2001,27,397.0,28.0,495.0,3832.0,33.0,...,4.5,110.0,5.0,7.0,13.0,23.0,24.0,28.0,31.0,28.0
664,664,DB,Raiders,2001,28,479.0,20.0,475.0,3306.0,28.0,...,4.0,85.0,3.0,15.0,2.0,25.0,9.0,25.0,7.0,30.0
665,665,DE,Rams,2001,29,540.0,35.0,587.0,5232.0,37.0,...,4.4,95.0,1.0,1.0,24.0,3.0,31.0,27.0,20.0,17.0
666,666,WR,Colts,2001,30,429.0,29.0,571.0,4282.0,33.0,...,4.3,109.0,4.0,2.0,15.0,8.0,15.0,18.0,25.0,14.0


In [25]:
train_df_2 = train_df_2.drop(columns=['Position Drafted', 'Team', 'Draft Year'])
test_df_2  = test_df_2.drop(columns=['Position Drafted', 'Team', 'Draft Year'])
train_df_2.to_csv('Train Data For ' + str(CURRENT_YEAR+1) + ' Draft.csv', encoding = 'utf-8-sig', index=False)
test_df_2.to_csv('Test Data For ' + str(CURRENT_YEAR+1) + ' Draft.csv', encoding = 'utf-8-sig', index=False)
train_df_2

Unnamed: 0,ID,Pick Number,Team PF,Team TO,Team Att,Team Yds.1,Team TD,Team Int,Team Att.1,Team Yds.2,...,Opp. Y/A,Opp. 1stD.2,Off. Rank PF,Off. Rank Yds.1,Off. Rank TO,Off. Rank Att,Def. Rank PF,Def. Rank Yds.1,Def. Rank TO,Def. Rank Att
0,32,1,279.0,30.0,616.0,3652.0,18.0,16.0,385.0,1517.0,...,4.7,124.0,30.0,19.0,27.0,6.0,25.0,21.0,28.0,2.0
1,33,2,266.0,21.0,479.0,2812.0,18.0,13.0,356.0,1583.0,...,4.7,120.0,32.0,32.0,16.0,28.0,27.0,18.0,15.0,11.0
2,34,3,341.0,23.0,571.0,3900.0,28.0,15.0,407.0,1649.0,...,4.1,94.0,18.0,10.0,19.0,17.0,26.0,32.0,24.0,31.0
3,35,4,341.0,33.0,607.0,3731.0,30.0,17.0,362.0,1685.0,...,3.9,100.0,18.0,18.0,30.0,9.0,30.0,28.0,28.0,17.0
4,36,5,306.0,26.0,615.0,3804.0,22.0,18.0,349.0,1156.0,...,4.5,113.0,25.0,12.0,25.0,7.0,32.0,26.0,28.0,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
631,663,27,397.0,28.0,495.0,3832.0,33.0,18.0,428.0,2129.0,...,4.5,110.0,5.0,7.0,13.0,23.0,24.0,28.0,31.0,28.0
632,664,28,479.0,20.0,475.0,3306.0,28.0,11.0,520.0,2470.0,...,4.0,85.0,3.0,15.0,2.0,25.0,9.0,25.0,7.0,30.0
633,665,29,540.0,35.0,587.0,5232.0,37.0,23.0,383.0,1843.0,...,4.4,95.0,1.0,1.0,24.0,3.0,31.0,27.0,20.0,17.0
634,666,30,429.0,29.0,571.0,4282.0,33.0,15.0,435.0,1859.0,...,4.3,109.0,4.0,2.0,15.0,8.0,15.0,18.0,25.0,14.0


In [26]:
model_ready_combine2 = ready_dataset_for_modeling(train_df_2, test_df_2)
train_dataset_2 = model_ready_combine2[0]
test_dataset_2 = model_ready_combine2[1]

In [27]:
PICK_NUM2 = 12
test2 = prediction_data(train_dataset_2, test_dataset_2, PICK_NUM2-1, reference_df2)
test2

Unnamed: 0,ID,Distance,Unit,Position Drafted,Team,Draft Year,Pick Number,Team PF,Team TO,Team Att,...,Opp. Y/A,Opp. 1stD.2,Off. Rank PF,Off. Rank Yds.1,Off. Rank TO,Off. Rank Att,Def. Rank PF,Def. Rank Yds.1,Def. Rank TO,Def. Rank Att
0,11,0.0,,,Chargers,2021,12,346.0,16.0,596.0,...,4.6,108.0,21.0,6.0,5.0,3.0,24.0,9.0,23.0,11.0
1,331,3.34,DEF,DT,Lions,2011,13,362.0,25.0,633.0,...,4.5,98.0,15.0,12.0,12.0,3.0,19.0,16.0,11.0,10.0
2,162,3.48,DEF,DE,Chargers,2016,3,320.0,24.0,667.0,...,4.8,101.0,26.0,4.0,17.0,2.0,21.0,14.0,24.0,3.0
3,237,3.62,DEF,LB,Steelers,2014,15,379.0,24.0,586.0,...,4.3,106.0,16.0,12.0,14.0,13.0,14.0,9.0,27.0,18.0
4,266,3.64,DEF,DB,Raiders,2013,12,290.0,26.0,629.0,...,4.3,95.0,26.0,8.0,17.0,5.0,28.0,20.0,26.0,7.0
5,586,3.74,DEF,DT,Patriots,2003,13,381.0,24.0,605.0,...,4.7,129.0,10.0,12.0,7.0,4.0,17.0,11.0,14.0,12.0


In [28]:
unit_prediction(train_dataset_2, test_dataset_2, PICK_NUM2-1, reference_df2)

{'OFF': 0, 'DEF': 100.0, 'Predicted': 'DEF', 'Actual': 'NA'}

In [29]:
test_unit_prediction(train_dataset_2, test_dataset_2, reference_df2)

2021
{'OFF': 80.0, 'DEF': 20.0, 'Predicted': 'OFF', 'Actual': 'NA'} Total 0/1
{'OFF': 80.0, 'DEF': 20.0, 'Predicted': 'OFF', 'Actual': 'NA'} Total 0/2
{'OFF': 80.0, 'DEF': 20.0, 'Predicted': 'OFF', 'Actual': 'NA'} Total 0/3
{'OFF': 20.0, 'DEF': 80.0, 'Predicted': 'DEF', 'Actual': 'NA'} Total 0/4
{'OFF': 80.0, 'DEF': 20.0, 'Predicted': 'OFF', 'Actual': 'NA'} Total 0/5
{'OFF': 60.0, 'DEF': 40.0, 'Predicted': 'OFF', 'Actual': 'NA'} Total 0/6
{'OFF': 60.0, 'DEF': 40.0, 'Predicted': 'OFF', 'Actual': 'NA'} Total 0/7
{'OFF': 60.0, 'DEF': 40.0, 'Predicted': 'OFF', 'Actual': 'NA'} Total 0/8
{'OFF': 40.0, 'DEF': 60.0, 'Predicted': 'DEF', 'Actual': 'NA'} Total 0/9
{'OFF': 100.0, 'DEF': 0, 'Predicted': 'OFF', 'Actual': 'NA'} Total 0/10
{'OFF': 20.0, 'DEF': 80.0, 'Predicted': 'DEF', 'Actual': 'NA'} Total 0/11
{'OFF': 0, 'DEF': 100.0, 'Predicted': 'DEF', 'Actual': 'NA'} Total 0/12
{'OFF': 20.0, 'DEF': 80.0, 'Predicted': 'DEF', 'Actual': 'NA'} Total 0/13
{'OFF': 60.0, 'DEF': 40.0, 'Predicted': 'OFF',

(0.0, 0.0)