In [16]:
import pandas as pd
import numpy as np
from os import path

pd.set_option('display.max_rows', 300)

DATA_FOLDER = '~/Python-Projects/UD-Draft-Model/Repo-Work/UD-Draft-Model/data/csvs'

def read_raw_data(folder_path):
    """
    Reads in the raw csvs and combines into one df.
    Might want to make more dynamic at some point.
    """

    df_raw_2021 = pd.read_csv(path.join(folder_path, '2021/raw_drafts.csv'))

    df_drafts_2022 = pd.read_csv(path.join(folder_path, '2022/df_drafts.csv'))
    df_league_info_2022 = pd.read_csv(path.join(folder_path, '2022/df_league_info.csv'))
    df_league_info_2022 = df_league_info_2022[['id', 'source', 'title']]

    rename_vars = {'id': 'draft_id', 'source': 'draft_source', 'title': 'draft_title'}
    df_league_info_2022.rename(columns=rename_vars, inplace=True)

    df_raw_2022 = pd.merge(df_drafts_2022, df_league_info_2022, how='left', on='draft_id')
    df_raw_2022

    df = pd.concat([df_raw_2021, df_raw_2022])

    return df


def update_dtypes(df: pd.DataFrame) -> pd.DataFrame:
    """ Updates columns to more appropriate dyptes """
    
    # Replace null adps and update to float
    df['projection_adp'] = np.where(df['projection_adp'] == '-', 216, df['projection_adp'])
    df['projection_adp'] = df['projection_adp'].astype('float')

    # Update created_at to datetime to use as possible filter
    df['created_at'] = pd.to_datetime(df['created_at'], infer_datetime_format=True)

    return df


def drafts_w_player_data(df: pd.DataFrame) -> pd.DataFrame:
    """
    Filters out drafts which do not have player attributes (team, position, etc.)
    as these will likely serve as features for the model.
    """

    df = df_raw_all.copy()

    null_drafts = df.loc[df['first_name'].isnull()]

    null_drafts = null_drafts.drop_duplicates(subset='draft_id')['draft_id'].to_frame()
    null_drafts['ind_null_name_draft'] = 1

    df = pd.merge(df, null_drafts, on='draft_id', how='left')
    df = df.loc[df['ind_null_name_draft'].isnull()]

    df.drop(columns='ind_null_name_draft', inplace=True)

    return df


def _add_draft_dt(df: pd.DataFrame) -> pd.DataFrame:
    df_drafts = df[['draft_id', 'created_at']].copy()
    df_drafts.sort_values(by=['draft_id', 'created_at'], inplace=True)

    df_drafts.drop_duplicates(subset='draft_id', keep='first', inplace=True)
    df_drafts.rename(columns={'created_at': 'draft_dt'}, inplace=True)

    df = pd.merge(df, df_drafts, on='draft_id', how='left')

    return df


def add_draft_attrs(df: pd.DataFrame) -> pd.DataFrame:
    """ Adds several draft level attributes """

    # Adds number of teams by draft
    by_vars = ['draft_id', 'draft_entry_id']
    draft_teams = df[by_vars].drop_duplicates(subset=by_vars)

    num_teams = draft_teams.groupby('draft_id').size().to_frame('num_teams')

    df = pd.merge(df, num_teams, on='draft_id', how='left')

    # Adds round and pick of the round by draft
    df['round'] = ((df['number'] - 1) / df['num_teams']).astype('int') + 1
    df['round_pick'] = df['number'] - ((df['round'] - 1) * df['num_teams'])

    # Add datetime of draft and year
    df = _add_draft_dt(df)
    df['draft_year'] = df['created_at'].dt.year

    return df


def add_model_vars(df: pd.DataFrame) -> pd.DataFrame:
    """ Adds additional variables to test in the model """

    df['actual_proj_adp_diff'] = df['projection_adp'] - df['number']

    return df

df_raw_all = read_raw_data(DATA_FOLDER)
df_updated_types = update_dtypes(df_raw_all)
df_complete_players = drafts_w_player_data(df_updated_types)
df_draft_attrs = add_draft_attrs(df_complete_players)

df_final = add_model_vars(df_draft_attrs)

df_final.dtypes


id                              object
appearance_id                   object
created_at              datetime64[ns]
draft_entry_id                  object
number                           int64
pick_slot_id                    object
points                         float64
projection_adp                 float64
projection_points              float64
swapped                           bool
draft_id                        object
player_id                       object
position                        object
team_name                       object
first_name                      object
last_name                       object
draft_source                    object
draft_title                     object
num_teams                        int64
round                            int64
round_pick                       int64
draft_dt                datetime64[ns]
draft_year                       int64
actual_proj_adp_diff           float64
dtype: object

In [22]:
df = df_final.copy()
df.loc[df['draft_dt'] == '2022-06-03 23:54:57']

Unnamed: 0,id,appearance_id,created_at,draft_entry_id,number,pick_slot_id,points,projection_adp,projection_points,swapped,...,first_name,last_name,draft_source,draft_title,num_teams,round,round_pick,draft_dt,draft_year,actual_proj_adp_diff
26316,2c41fea7-815c-401f-bbc5-6cc58cc514de,fa2fcc47-5a09-458e-82b6-656b030ad88a,2022-06-03 23:54:57,68863415-6978-41ee-8bd0-9fe256ccd1d9,1,fce28407-d1df-57fc-bfce-28922a91222d,32.3,1.1,383.1,False,...,Jonathan,Taylor,tournament,The Puppy,12,1,1,2022-06-03 23:54:57,2022,0.1
26317,a6e9babf-d1e6-4659-9c3c-77dc966582eb,715fd9c0-eb4b-494a-ac9f-d5c53579db72,2022-06-03 23:55:04,e4cd79d1-453b-4988-9658-a190c11e5935,2,fce28407-d1df-57fc-bfce-28922a91222d,62.3,3.1,270.3,False,...,Christian,McCaffrey,tournament,The Puppy,12,1,2,2022-06-03 23:54:57,2022,1.1
26318,7b937de2-5cf7-483d-8f8f-195da56f05d2,70580da9-7858-462f-a1fd-4ff47a7b1172,2022-06-03 23:55:08,3b6182cd-bb77-4c07-bb2a-28f196f8afc9,3,f307af80-184f-5828-8105-4083bec970d6,85.2,2.5,330.1,False,...,Cooper,Kupp,tournament,The Puppy,12,1,3,2022-06-03 23:54:57,2022,-0.5
26319,470e9f42-2ac2-4f13-873d-f4d047b403d7,62154a84-9ad4-41f8-b3f3-23801013ebc8,2022-06-03 23:55:13,a8c7ac20-bce3-460a-be66-3b786f852025,4,f307af80-184f-5828-8105-4083bec970d6,60.9,4.1,266.5,False,...,Justin,Jefferson,tournament,The Puppy,12,1,4,2022-06-03 23:54:57,2022,0.1
26320,70ed0887-8450-4d45-bb8d-360cf5842975,6022e5bc-a85c-471a-b78b-a66066f24ce4,2022-06-03 23:55:44,5dae1ebf-3525-465a-81c1-2e5f24d0daf7,5,f307af80-184f-5828-8105-4083bec970d6,52.0,5.1,276.2,False,...,Ja'Marr,Chase,tournament,The Puppy,12,1,5,2022-06-03 23:54:57,2022,0.1
26321,93430f99-8da9-4a33-af45-0b3f0d5a76ad,40f907c8-711d-4f93-93f7-e268d966fe42,2022-06-03 23:56:15,3bf2de5a-1352-4908-8ecd-0de932f8c933,6,fce28407-d1df-57fc-bfce-28922a91222d,64.3,6.6,260.0,False,...,Austin,Ekeler,tournament,The Puppy,12,1,6,2022-06-03 23:54:57,2022,0.6
26322,ad9ab67c-674f-418e-9e45-e54e6c988f03,69687967-146f-40b7-9dc9-851995c3f4ac,2022-06-03 23:56:22,8206b007-f1d9-4ecc-9cf0-945b6d371974,7,fce28407-d1df-57fc-bfce-28922a91222d,53.5,7.5,318.9,False,...,Derrick,Henry,tournament,The Puppy,12,1,7,2022-06-03 23:54:57,2022,0.5
26323,0b4a66d7-0513-47df-bb44-dc45138aaba0,ca6b7b2f-cdba-40cd-86ec-851629b8890b,2022-06-03 23:56:35,e0e4d884-8e46-4c62-be1d-7a688b44c94f,8,f307af80-184f-5828-8105-4083bec970d6,80.1,8.8,235.9,False,...,Stefon,Diggs,tournament,The Puppy,12,1,8,2022-06-03 23:54:57,2022,0.8
26324,dd779de1-67a5-4a42-8d4c-dacab68e4f00,f015ede6-d975-495c-ac72-4e01ec70fee8,2022-06-03 23:56:42,dd77cfc6-8609-443d-bcf8-b8d0675d35ec,9,fce28407-d1df-57fc-bfce-28922a91222d,40.0,8.6,221.6,False,...,Najee,Harris,tournament,The Puppy,12,1,9,2022-06-03 23:54:57,2022,-0.4
26325,d71c1b68-b51c-4be7-b02c-7e50951cbdda,f40693b0-68e6-4360-b90c-b55bc425147b,2022-06-03 23:56:54,bb6cc2c5-34ba-47b7-9c79-2899a0e2b5b7,10,fce28407-d1df-57fc-bfce-28922a91222d,35.5,10.7,248.6,False,...,Dalvin,Cook,tournament,The Puppy,12,1,10,2022-06-03 23:54:57,2022,0.7


In [21]:
by_vars = ['draft_id', 'draft_dt', 'draft_year', 'draft_source']
df = df_final[by_vars].copy()
df.drop_duplicates(subset='draft_id', inplace=True)
df.sort_values(by='draft_dt', inplace=True)
df = df.loc[df['draft_year'] == 2022]
df

Unnamed: 0,draft_id,draft_dt,draft_year,draft_source
33660,133d89ab-ba4f-4230-9148-396bee781f5c,2022-05-15 23:09:01,2022,sit_and_go
33444,34b0228a-98af-4b65-b675-5da3c1fff455,2022-05-16 00:16:37,2022,sit_and_go
33228,878a67c6-3a71-430e-bf7f-f7d795ac5bd4,2022-05-20 23:41:23,2022,sit_and_go
33012,56db91d7-372a-4d94-9be9-b764ec3682e6,2022-05-21 01:05:46,2022,sit_and_go
32796,60a7c668-de4b-4f31-9366-f93f61765a55,2022-05-21 22:11:14,2022,sit_and_go
32580,03c05fa1-0f9b-4390-b2e2-1822135a4791,2022-05-24 22:01:01,2022,sit_and_go
32364,5fc200d5-5f37-45e7-9cfb-0e073a9a4e8e,2022-05-26 22:04:02,2022,sit_and_go
32148,2a35ed1c-8e6a-456a-93c9-71b3d436f056,2022-05-31 20:41:46,2022,sit_and_go
26316,f8962b74-280b-43ae-9009-e3b4b4f23db5,2022-06-03 23:54:57,2022,tournament
31932,3b54e1ef-f16c-473e-867b-c048d59a8e52,2022-06-04 00:05:51,2022,sit_and_go


In [132]:
df_final.head()

df = df_final.copy()
df = df.loc[df['draft_id'] == 'd525469e-276a-4cf3-ad07-a268841faea3']

df.drop_duplicates(subset=['draft_entry_id'], inplace=True)

df
# df.loc[df['number'].isin([1, 24])]

Unnamed: 0,id,appearance_id,created_at,draft_entry_id,number,pick_slot_id,points,projection_adp,projection_points,swapped,draft_id,player_id,position,team_name,first_name,last_name,draft_source,draft_title,actual_proj_adp_diff
0,9263abfe-1103-4385-a95b-0e89c8ffc5fe,78a5634d-93aa-4cd9-b1af-dfb829df452c,2021-08-25 01:44:33,74b3fdcc-6128-4e7a-ae1a-fda04c1859d3,1,fce28407-d1df-57fc-bfce-28922a91222d,105.5,1.0,308.3,False,d525469e-276a-4cf3-ad07-a268841faea3,cb7b1dc6-91ce-47b8-813c-07486371a922,RB,Carolina Panthers,Christian,McCaffrey,,,0.0
1,a694c83a-5e09-496d-8451-71067f425400,a971451b-f89e-498f-a6b4-060854880ed6,2021-08-25 01:45:03,9489f27e-36b8-457c-bbc7-dd7757051ca9,2,fce28407-d1df-57fc-bfce-28922a91222d,156.4,2.1,282.5,False,d525469e-276a-4cf3-ad07-a268841faea3,bcc30f17-0f7c-4772-9303-a6e4439409c7,RB,Minnesota Vikings,Dalvin,Cook,,,0.1
2,fa252280-e34a-496e-9c90-3909caba313d,30149020-3a70-47ce-a8e2-b52f0a5770fa,2021-08-25 01:45:06,b84b9608-e0c8-4d65-a4ab-7698168e4c9f,3,fce28407-d1df-57fc-bfce-28922a91222d,159.5,3.7,261.2,False,d525469e-276a-4cf3-ad07-a268841faea3,cc542526-a1a8-4945-9780-3b7228a62322,RB,New Orleans Saints,Alvin,Kamara,,,0.7
3,11e9522f-f2b6-4f15-953a-3f6548424046,53565b50-b52d-4c4f-bcf7-ee96476f4bbc,2021-08-25 01:45:26,76e8b661-e9b0-4e1d-a5b5-59184955a1bb,4,fce28407-d1df-57fc-bfce-28922a91222d,184.3,4.3,248.8,False,d525469e-276a-4cf3-ad07-a268841faea3,e9fdd9a9-7652-4f39-b790-071e2839f862,RB,Tennessee Titans,Derrick,Henry,,,0.3
4,850f1db4-09f2-41a4-80f4-9bcccfb4f74d,c5897583-8409-4f22-a2af-e0eb91deaadd,2021-08-25 01:45:32,5de796ee-9b9e-4975-bbe6-5abd444c41db,5,fce28407-d1df-57fc-bfce-28922a91222d,137.76,5.6,252.4,False,d525469e-276a-4cf3-ad07-a268841faea3,b6fc1eff-ec88-4523-9656-d9fd6b53a7ff,RB,Dallas Cowboys,Ezekiel,Elliott,,,0.6
5,d9a35d60-b4cf-432d-b24c-eb60b021511a,60fa6a9f-f875-4f7f-9bdc-8528b10fa2ec,2021-08-25 01:45:41,48c8da6f-eab3-462a-bdb1-79ead4b1c326,6,f307af80-184f-5828-8105-4083bec970d6,184.9,6.1,247.9,False,d525469e-276a-4cf3-ad07-a268841faea3,56c3ad41-32a6-413e-8cf0-50912b952f92,WR,Green Bay Packers,Davante,Adams,,,0.1
6,700e982f-6f28-4b4a-930b-0b9eaa275ce1,fbaa93f9-6961-407f-9042-5d033198e870,2021-08-25 01:45:45,c32869d5-26f3-4907-afd9-6b7d82548ba3,7,fce28407-d1df-57fc-bfce-28922a91222d,89.4,11.6,251.1,False,d525469e-276a-4cf3-ad07-a268841faea3,aa48a0ad-1d71-4d09-a86a-1e6e1987d911,RB,NY Giants,Saquon,Barkley,,,4.6
7,e58dd773-2f87-4157-835f-7238d4dfd325,89934e03-aeea-48ce-aedf-1921375c3309,2021-08-25 01:45:55,813af4bc-b1ee-488d-99f7-4a9fd83b138a,8,fce28407-d1df-57fc-bfce-28922a91222d,146.4,15.0,246.4,False,d525469e-276a-4cf3-ad07-a268841faea3,6328d647-7cbf-4947-8a78-b7a62284baa3,RB,Cleveland Browns,Nick,Chubb,,,7.0
8,c6206b48-9897-4b2d-aebe-a3a841bdcca3,e4241c8c-f93e-4cc0-9512-0ed95e81f98d,2021-08-25 01:46:02,22c11714-eb8c-4027-9823-ac3cd7c267cb,9,b98fc390-e175-576c-a198-a86bc83c3453,151.5,6.9,218.2,False,d525469e-276a-4cf3-ad07-a268841faea3,9ba8d5db-3aa3-4600-98f0-406a1e67bb22,TE,Kansas City Chiefs,Travis,Kelce,,,-2.1
9,b54ca0eb-c047-454d-b95a-c508032977cc,6f5ca433-e04e-47e9-aea3-f49aff710a42,2021-08-25 01:46:06,af7fb320-783e-480a-8f7d-4f8c74dd1ec1,10,fce28407-d1df-57fc-bfce-28922a91222d,136.9,9.2,241.2,False,d525469e-276a-4cf3-ad07-a268841faea3,daefbcd5-dfac-49ac-83d7-2a9e6ec03545,RB,Green Bay Packers,Aaron,Jones,,,-0.8


In [69]:
df_final.loc[df_final['projection_adp'] == 97.1]

Unnamed: 0,id,appearance_id,created_at,draft_entry_id,number,pick_slot_id,points,projection_adp,projection_points,swapped,...,team_name,first_name,last_name,draft_source,draft_title,num_teams,round,round_pick,draft_year,actual_proj_adp_diff
4203,a445a63b-0065-453a-be69-2171d09b48e4,a5da55f2-db18-4855-ae9b-83e3e126ca03,2021-08-04 00:14:40,1be681b4-30a6-4184-8aa2-36160ae4f22a,100,feb6064a-c137-5581-96c1-9a9d384230a6,225.14,97.1,320.3,False,...,Philadelphia Eagles,Jalen,Hurts,,,12,9,4,2021,-2.9
4412,07f661d7-0ee8-408c-af49-c1f004237342,18333356-4269-4ead-8276-c0d6c22f486b,2021-08-03 01:12:16,b2fe2cef-5428-4ac6-b1ea-5adb5b24ef24,93,b5fe78df-8a90-5320-b331-fa287e4ebfb5,127.4,97.1,123.6,False,...,Jacksonville Jaguars,James,Robinson,,,12,8,9,2021,4.1
4634,396942ba-0e11-496a-b619-567ed7114665,18333356-4269-4ead-8276-c0d6c22f486b,2021-08-03 00:29:03,4fa1e730-3584-41a1-8819-0cec046e45bc,99,eb6f649b-6c49-53fb-866b-ce512cd76d4a,119.5,97.1,123.6,False,...,Jacksonville Jaguars,James,Robinson,,,12,9,3,2021,-1.9
5939,dbb169af-9c5a-4102-9d13-ab9f5d0d1b13,e849512a-30ae-4de1-b705-177056ca3f9d,2021-07-25 20:22:53,1eeff918-d3ef-43cc-8355-2559a2d6601d,108,b98fc390-e175-576c-a198-a86bc83c3453,73.4,97.1,122.0,False,...,Los Angeles Rams,Tyler,Higbee,,,12,9,12,2021,-10.9
7211,650861d7-849b-4d8e-859c-1400de757f21,0c453993-12a1-4019-ac33-b56206e18271,2021-07-03 13:23:01,cb52d2f5-e61c-45e3-a08c-8ea8281e185d,84,feb6064a-c137-5581-96c1-9a9d384230a6,169.7,97.1,295.7,False,...,Cincinnati Bengals,Joe,Burrow,,,12,7,12,2021,13.1
7855,dd11516a-5191-4143-8b30-f28ec74ed6d8,52b3e3f7-5732-4e22-9e6f-5ba8e8ecfadf,2021-06-26 01:52:39,ae92a4ff-01d1-4df4-ab12-9efd73731f86,80,0003fd2a-1e80-5f7e-979f-62cf5b86fe05,165.3,97.1,120.0,False,...,Tampa Bay Buccaneers,Leonard,Fournette,,,12,7,8,2021,17.1
10908,ddccee8c-5696-49f6-9a83-514930216673,e849512a-30ae-4de1-b705-177056ca3f9d,2021-05-21 22:35:40,9c6581e4-4237-459e-89ab-39d16d44645f,109,b98fc390-e175-576c-a198-a86bc83c3453,73.4,97.1,106.2,False,...,Los Angeles Rams,Tyler,Higbee,,,12,10,1,2021,-11.9
11100,25a06c34-2135-49db-9849-cb85c777b505,3e20d7a1-ae60-4dc3-a02a-9dfed27a788b,2021-05-16 14:24:33,edfffbf4-5b18-4a32-ba81-eb3f0b5c5407,85,eb6f649b-6c49-53fb-866b-ce512cd76d4a,75.8,97.1,106.7,False,...,Dallas Cowboys,Tony,Pollard,,,12,8,1,2021,12.1
11560,ae9f0e57-3a46-4ee9-89ba-94c41c8c32e5,3c7c3ad9-3224-42f6-8dc1-e8b348452438,2021-05-13 23:51:02,aa4db3fb-4c86-4b15-a44e-6dcbe2f59238,113,eb6f649b-6c49-53fb-866b-ce512cd76d4a,107.38,97.1,387.8,False,...,Green Bay Packers,Aaron,Rodgers,,,12,10,5,2021,-15.9
17813,112b2bca-3919-4136-93e9-894987da83a1,e849512a-30ae-4de1-b705-177056ca3f9d,2021-07-25 16:43:50,b6e943f0-24cf-48ba-8c9c-526fd5680004,102,b98fc390-e175-576c-a198-a86bc83c3453,90.7,97.1,122.0,False,...,Los Angeles Rams,Tyler,Higbee,sit_and_go,,12,9,6,2021,-4.9


In [64]:
df = df_final[['appearance_id', 'projection_adp']].copy()
df.drop_duplicates(subset=['appearance_id', 'projection_adp'], inplace=True)

df = df.groupby('projection_adp').size().to_frame('count').reset_index()

df.sort_values(by='count', ascending=False, inplace=True)
df.reset_index(inplace=True)

df.iloc[0:50]
# print(len(df[df['count'] > 5]))

# print(len(df))

Unnamed: 0,index,projection_adp,count
0,2144,215.7,35
1,2141,215.4,30
2,2142,215.5,29
3,2145,215.8,28
4,2131,214.4,25
5,2130,214.3,24
6,2146,215.9,24
7,2140,215.3,24
8,2139,215.2,24
9,2132,214.5,23


In [None]:
###########################################################################################
################################### Basic Exploration #####################################
###########################################################################################

In [45]:
# df = df_final.copy()

dfs = []
for round in range(1, 19):
    df = df_final.loc[df_final['round'] == round]

    df = df['actual_proj_adp_diff'].quantile([.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99]).to_frame()
    df = df.transpose()

    df['round'] = round

    cols = df.columns.to_list()
    cols = cols[-1:] + cols[:-1]

    df = df[cols]

    dfs.append(df)

df = pd.concat(dfs)


# df[['0.01', '0.05']]
df


Unnamed: 0,round,0.01,0.05,0.25,0.5,0.75,0.95,0.99
actual_proj_adp_diff,1,-3.3,-2.0,-0.4,0.2,1.1,3.5,7.0
actual_proj_adp_diff,2,-5.1,-3.6,-1.4,0.1,1.9,4.895,9.295
actual_proj_adp_diff,3,-6.7,-4.5,-1.5,0.4,2.5,7.195,11.357
actual_proj_adp_diff,4,-9.319,-5.9,-2.175,0.5,3.2,9.495,15.614
actual_proj_adp_diff,5,-11.538,-7.7,-3.1,0.0,3.475,9.895,18.214
actual_proj_adp_diff,6,-12.938,-8.3,-2.9,0.3,4.0,11.3,22.738
actual_proj_adp_diff,7,-13.619,-9.0,-3.6,0.1,4.4,12.3,23.495
actual_proj_adp_diff,8,-14.595,-10.3,-3.7,0.3,4.7,13.5,23.7
actual_proj_adp_diff,9,-17.2,-11.195,-4.0,1.2,5.5,14.3,22.733
actual_proj_adp_diff,10,-18.7,-12.695,-5.6,-0.4,5.3,17.195,33.3


In [121]:
# Check correlations between primary modeling variables and draft pick
df[['number', 'projection_adp', 'actual_proj_adp_diff']].corr()

Unnamed: 0,number,projection_adp,actual_proj_adp_diff
number,1.0,0.989551,-0.024938
projection_adp,0.989551,1.0,0.119459
actual_proj_adp_diff,-0.024938,0.119459,1.0


In [70]:
df = df_complete_players.copy()
df = df[['draft_id', 'draft_source']].drop_duplicates(subset='draft_id')

df = df.groupby('draft_source', dropna=False).size().to_frame('num_drafts').reset_index()

df

Unnamed: 0,draft_source,num_drafts
0,sit_and_go,68
1,tournament,33
2,,55
