In [None]:
import datetime
import logging
import requests
import time
import pandas as pd

from bs4 import BeautifulSoup
from draft_kings import Client, Sport
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

In [None]:
def setup_driver():
    options = Options()
    options.add_argument('log-level=3')
    return webdriver.Chrome(options=options)

In [None]:
season_long = [
    "https://www.cbssports.com/fantasy/basketball/stats/G/2020/ytd/stats/",
    "https://www.cbssports.com/fantasy/basketball/stats/F/2020/ytd/stats/",
    "https://www.cbssports.com/fantasy/basketball/stats/C/2020/ytd/stats/",
    "https://www.cbssports.com/fantasy/basketball/stats/PG/2020/ytd/stats/",
    "https://www.cbssports.com/fantasy/basketball/stats/SG/2020/ytd/stats/",
    "https://www.cbssports.com/fantasy/basketball/stats/PF/2020/ytd/stats/",
    "https://www.cbssports.com/fantasy/basketball/stats/SF/2020/ytd/stats/"
]
last_7 = [
    "https://www.cbssports.com/fantasy/basketball/stats/G/2020/7d/stats/",
    "https://www.cbssports.com/fantasy/basketball/stats/F/2020/7d/stats/",
    "https://www.cbssports.com/fantasy/basketball/stats/C/2020/7d/stats/",
    "https://www.cbssports.com/fantasy/basketball/stats/PG/2020/7d/stats/",
    "https://www.cbssports.com/fantasy/basketball/stats/SG/2020/7d/stats/",
    "https://www.cbssports.com/fantasy/basketball/stats/PF/2020/7d/stats/",
    "https://www.cbssports.com/fantasy/basketball/stats/SF/2020/7d/stats/"
]
last_14 = [
    "https://www.cbssports.com/fantasy/basketball/stats/G/2020/14d/stats/",
    "https://www.cbssports.com/fantasy/basketball/stats/F/2020/14d/stats/",
    "https://www.cbssports.com/fantasy/basketball/stats/C/2020/14d/stats/",
    "https://www.cbssports.com/fantasy/basketball/stats/PG/2020/14d/stats/",
    "https://www.cbssports.com/fantasy/basketball/stats/SG/2020/14d/stats/",
    "https://www.cbssports.com/fantasy/basketball/stats/PF/2020/14d/stats/",
    "https://www.cbssports.com/fantasy/basketball/stats/SF/2020/14d/stats/"
]

## User Inputs Below

In [None]:
# This is the only place users need to input their own preferences

# Choose data for season (season_long), the last 7 games (last_7), or the last 14 games (last_14)
url_list = season_long

# Choose your date and time that you want draftkings information salary prices for EX: 2020-12-22 00:30
# Note date needs to be in UTC time which is 6 hours ahead of EST time.
# Therefore a 7PM game slate on 2020-12-22 EST will be 2020-12-23 00:30 UTC.
date = 'YYYY-MM-DD HH:MM'

In [None]:
# Starts up Selenium and collects data from CBS
date = datetime.datetime.strptime(date, '%Y-%m-%d %H:%M')
df_list = []
driver = setup_driver()
for link in url_list:
    driver.get(link)
    logging.info(f"Getting {link} data.")
    time.sleep(5)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    table = soup.find("table", {"class": "TableBase-table"})
    df = pd.read_html(str(table), header=0)[0]
    df = df.replace({'—': 0}, regex=True)
    df_list.append(df)
all_players = pd.concat(df_list)
all_players[["position", "team", "Name", "position_2"]] = all_players.Player.str.extract(
    "\D (SG|PG|SF|C|PF|F|G)\s+([A-Z]+)\s+(\D+) (SG|PG|SF|C|PF|F|G) ", expand=True)
all_players = all_players.drop_duplicates(subset='Name')
all_players = all_players.drop(columns=['Player', 'position_2'])
driver.quit()

In [None]:
# Read in draftkings data for selected slate and game_type
possible_groups = []
for game in Client().contests(sport=Sport.NBA).contests:
    slate_time = datetime.datetime.strptime(
        f"{game.starts_at.year}-{game.starts_at.month}-{game.starts_at.day} {game.starts_at.time().hour}:{game.starts_at.time().minute}", '%Y-%m-%d %H:%M')
    if slate_time == date:
        possible_groups.append(game.draft_group_id)
key_length = 0
key_slate = 0
type_id = 70
for group in set(possible_groups):
    if Client().draft_group_details(draft_group_id=group).contest_details.type_id == type_id:
        group_length = len(Client().draft_group_details(draft_group_id=group).games)
        if group_length > key_length:
            key_length = group_length
            key_slate = group
data = pd.read_csv(f"https://www.draftkings.com/lineup/getavailableplayerscsv?contestTypeId={type_id}&draftGroupId={key_slate}")

In [None]:
# Clean and merge data
data['Name'] = data['Name'].str.replace('.', '')
all_players['Name'] = all_players['Name'].str.replace('.', '')
data['Name'] = data['Name'].str.strip()
all_players['Name'] = all_players['Name'].str.strip()
merged_data = data.merge(all_players, on="Name")

In [None]:
# Clean data columns
merged_data.columns = [
    'position', 'Name + ID', 'name', 'ID', 'Roster Position', 'salary',
    'Game Info', 'team', 'fantasy_points_pg', 'games_played', 'games_started',
    'fpts  Fantasy Points', 'min  Minutes', 'minutes_pg', 'fgm', 'fga', 'fg%',
    'ftm', 'fta', 'ft%', '3pm', '3pa', '3fg%', 'pts', 'points_pg',
    'reb  Total Rebounds', 'rebound_pg', 'ast  Assists', 'assist_pg',
    'stl  Steals', 'steal_pg', 'to  Turnovers', 'turnover_pg', 'blk  Blocks',
    'block_pg', 'position_2', 'team_2'
]
merged_data['games_played'] = merged_data['games_played'].astype(float)
merged_data['fga'] = merged_data['fga'].astype(float)
merged_data['fgm'] = merged_data['fgm'].astype(float)
merged_data['ftm'] = merged_data['ftm'].astype(float)
merged_data['fta'] = merged_data['fta'].astype(float)
merged_data['3pm'] = merged_data['3pm'].astype(float)
merged_data['3pa'] = merged_data['3pa'].astype(float)
merged_data['field_goal_attempt_pg'] = (merged_data['fga'] / merged_data['games_played']).round(1)
merged_data['field_goal_made_pg'] = (merged_data['fgm'] / merged_data['games_played']).round(1)
merged_data['free_throw_made_pg'] = (merged_data['ftm'] / merged_data['games_played']).round(1)
merged_data['free_throw_attempt_pg'] = (merged_data['fta'] / merged_data['games_played']).round(1)
merged_data['3_point_attempt_pg'] = (merged_data['3pm'] / merged_data['games_played']).round(1)
merged_data['3_point_made_pg'] = (merged_data['3pa'] / merged_data['games_played']).round(1)

In [None]:
# Chooses final columns
merged_data = merged_data[[
    'position', 'name', 'salary', 'team', 'fantasy_points_pg', 'games_played',
    'games_started', 'minutes_pg', 'points_pg', 'field_goal_attempt_pg',
    'field_goal_made_pg', 'fg%', 'free_throw_attempt_pg', 'free_throw_made_pg',
    'ft%', '3_point_attempt_pg', '3_point_made_pg', '3fg%',
    'rebound_pg', 'assist_pg', 'steal_pg', 'turnover_pg', 'block_pg'
]]

In [None]:
merged_data.head(10)