In [3]:
import numpy as np
import pandas as pd
import csv
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import optuna
import time

def scrape(option: int, player_pos, player_name) -> pd.DataFrame:
    url = None

    if player_pos == 'D':
        # hacky way to get seasonal defensive stats
        url = 'https://www.fantasypros.com/nfl/stats/dst.php'
    else:
        url_head = r'https://www.nfl.com/players/'
        url_feet = '/stats/career'
        url = url_head + player_name + url_feet

    s_df = pd.read_html(url)
    return s_df[option]  # option of 0 = first table, 1 = second table

def objective(trial):
    param = {
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.28),
        'n_estimators': trial.suggest_int('n_estimators', 40, 1000),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0.01, 1.0),
        'subsample': trial.suggest_float('subsample', 0.01, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.01, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1.0),
        'random_state': trial.suggest_int('random_state', 1, 1000)  # ,'seed': trial.suggest_int('seed', 0, 10000)
    }
    m = xgb.XGBRegressor(**param)
    m.fit(data_train, label_train)
    y_pred = m.predict(data_test)
    return np.sqrt(mean_squared_error(label_test, y_pred))

In [4]:
best_sets = []

with open('tes.csv', mode ='r') as file:
    f = csv.reader(file)

    for row in f:
        fantasy_points = []
        player, pos = row[0], row[1]
        print("Optimizing: ", player)
        df = scrape(0, pos, player)  # get receiving

        for i in range(df.shape[0] - 1):
            # for te
            points = 0

            # receiving TD
            rush_td = df.at[i, 'TD']
            points += (rush_td * 6)

            # receiving YDs
            rush_yds = df.at[i, 'YDS'] * 0.1
            points += rush_yds

            fantasy_points.append(points)

        fantasy_points.append(sum(fantasy_points))  # this line calculates the total sum of all fantasy points on table
        df['Fantasy Points'] = fantasy_points

        df.drop('YEAR', axis=1, inplace=True)
        df.drop('TEAM', axis=1, inplace=True)
        df.drop('G', axis=1, inplace=True)

        df.drop(df.shape[0] - 1, axis=0, inplace=True)

        df = df[::-1]

        temp = pd.DataFrame()
        temp['re-REC'] = df['REC']
        temp['re-YDS'] = df['YDS']
        temp['re-AVG'] = df['AVG']
        temp['re-LNG'] = df['LNG']
        temp['re-TD'] = df['TD']
        temp['re-1st'] = df['1st']
        temp['re-1st'] = df['1st%']
        temp['re-20+'] = df['20+']
        temp['re-40+'] = df['40+']
        temp['Fantasy Points'] = df['Fantasy Points']
        target = df['Fantasy Points'].tolist()
        target.pop(0)
        target.append(0)
        temp['Target'] = target
        df = temp
        df = df[::-1]  # reverse the rows

        df = df.fillna(0)
        df = df.iloc[1:]

        #print(df.shape[0])

        if df.shape[0] > 1:
            data, label = df.iloc[:, :-1], df.iloc[:, -1]
            xgb.DMatrix(data=data, label=label, enable_categorical=True)

            data_train, data_test = data.iloc[1:], data.iloc[:1]  # most recent season
            label_train, label_test = label.iloc[1:], label.iloc[:1]

            scaler = StandardScaler()
            #print(data_train)
            data_train = scaler.fit_transform(data_train)
            data_test = scaler.transform(data_test)

            model = xgb.XGBRegressor()
            model.fit(data_train, label_train)
            preds = model.predict(data_test)

            # optimize parameters
            num_trials = 200

            start = time.time()
            optuna.logging.set_verbosity(optuna.logging.WARNING)
            study = optuna.create_study(direction='minimize', study_name='regression') # when first run
            #study = joblib.load('qb.pkl')
            study.optimize(objective, n_trials=num_trials, show_progress_bar=True, n_jobs=-1)

            #joblib.dump(study, "test.pkl")  # save study

            # result = study.best_params
            result = study.best_trial.params
            #print("Time: ", time.time() - start)
            #print(result)
            best_sets.append(result)

Optimizing:  travis-kelce


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  george-kittle


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  taysom-hill


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  t-j-hockenson


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  mark-andrews


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  evan-engram


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  cole-kmet


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  juwan-johnson


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  dawson-knox


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  dallas-goedert


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  dalton-schultz


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  pat-freiermuth
Optimizing:  david-njoku


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  gerald-everett


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  tyler-higbee


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  jordan-akins


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  tyler-conklin


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  noah-fant


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  zach-ertz


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  mike-gesicki


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  chigoziem-okonkwo
Optimizing:  hunter-henry


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  robert-tonyan


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  darren-waller


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  austin-hooper


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  hayden-hurst


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  isaiah-likely
Optimizing:  greg-dulcich
Optimizing:  will-dissly


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  foster-moreau


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  cade-otton
Optimizing:  jelani-woods
Optimizing:  kyle-pitts
Optimizing:  brock-wright
Optimizing:  daniel-bellinger
Optimizing:  colby-parkinson


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  noah-gray
Optimizing:  mycole-pruitt


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  logan-thomas


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  c-j-uzomah


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  mo-alie-cox


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  tommy-tremble
Optimizing:  trey-mcbride
Optimizing:  harrison-bryant


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  irv-smith


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  kylen-granson
Optimizing:  shane-zylstra
Optimizing:  jake-ferguson
Optimizing:  peyton-hendershot
Optimizing:  josh-oliver


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  adam-trautman


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  o-j-howard


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  jonnu-smith


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  durham-smythe


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  connor-heyward
Optimizing:  teagan-quitoriano
Optimizing:  jody-fortson
Optimizing:  eric-saubert


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  marcedes-lewis


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  johnny-mundt


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  mitchell-wilcox
Optimizing:  eric-tomlinson


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  ian-thomas


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  donald-parham


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  cameron-brate


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  james-mitchell
Optimizing:  john-bates
Optimizing:  ross-dwelley


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  albert-okwuegbunam


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  ko-kieft
Optimizing:  dan-arnold


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  tanner-hudson


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  zach-gentry


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  brevin-jordan
Optimizing:  chris-myarick


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  quintin-morris
Optimizing:  jack-stoll
Optimizing:  geoff-swaim


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  pharaoh-brown


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  josiah-deguara


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  brycen-hopkins


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  anthony-firkser


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  giovanni-ricci
Optimizing:  armani-rogers
Optimizing:  parker-hesse
Optimizing:  kyle-rudolph


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  grant-calcaterra
Optimizing:  blake-bell


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  tre-mckitty
Optimizing:  andrew-beck


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  tyler-mabry
Optimizing:  tyler-kroft


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  nick-vannett


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  charlie-kolar
Optimizing:  trevon-wesco


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  stephen-sullivan


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  chris-manhertz


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  luke-farrell
Optimizing:  kendall-blanton


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  ryan-francis-griffin


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  tyler-davis-3


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  ben-ellefson


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  cole-turner
Optimizing:  jesper-horsted


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  maxx-williams


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  stone-smartt
Optimizing:  sean-mckeon


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  stephen-anderson


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  jeremy-ruckert
Optimizing:  tommy-sweeney


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  jacob-harris
Optimizing:  mason-schreck


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  devin-asiasi


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  richard-rodgers


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

In [5]:
best_sets

[{'max_depth': 7,
  'learning_rate': 0.09309995995529681,
  'n_estimators': 265,
  'min_child_weight': 1,
  'gamma': 0.6699921882993306,
  'subsample': 0.8812984683779539,
  'colsample_bytree': 0.9857941864703951,
  'reg_alpha': 0.12324322507489847,
  'reg_lambda': 0.3742545331306651,
  'random_state': 712},
 {'max_depth': 3,
  'learning_rate': 0.23814684845990025,
  'n_estimators': 381,
  'min_child_weight': 3,
  'gamma': 0.3373545947852323,
  'subsample': 0.7541984047288197,
  'colsample_bytree': 0.6667538856794842,
  'reg_alpha': 0.08854041651617969,
  'reg_lambda': 0.018031214478010982,
  'random_state': 936},
 {'max_depth': 4,
  'learning_rate': 0.09310649134491553,
  'n_estimators': 662,
  'min_child_weight': 4,
  'gamma': 0.69142377833821,
  'subsample': 0.5488027323683404,
  'colsample_bytree': 0.37994222672449685,
  'reg_alpha': 0.9332921694472365,
  'reg_lambda': 0.785819147784661,
  'random_state': 967},
 {'max_depth': 8,
  'learning_rate': 0.22233184815134413,
  'n_estimato

In [6]:
max_depth_agg = []
lr_agg = []
n_est_agg = []
min_cw_agg = []
gamma_agg = []
subsam_agg = []
col_bytree_agg = []
alpha_agg = []
lambda_agg = []
random_agg = []

for node in best_sets:
    max_depth_agg.append(node['max_depth'])
    lr_agg.append(node['learning_rate'])
    n_est_agg.append(node['n_estimators'])
    min_cw_agg.append(node['min_child_weight'])
    gamma_agg.append(node['gamma'])
    subsam_agg.append(node['subsample'])
    col_bytree_agg.append(node['colsample_bytree'])
    alpha_agg.append(node['reg_alpha'])
    lambda_agg.append(node['reg_lambda'])
    random_agg.append(node['random_state'])

max_depth_agg = int(sum(max_depth_agg)/len(max_depth_agg))
lr_agg = sum(lr_agg)/len(lr_agg)
n_est_agg = int(sum(n_est_agg)/len(n_est_agg))
min_cw_agg = int(sum(min_cw_agg)/len(min_cw_agg))
gamma_agg = sum(gamma_agg)/len(gamma_agg)
subsam_agg = sum(subsam_agg)/len(subsam_agg)
col_bytree_agg = sum(col_bytree_agg)/len(col_bytree_agg)
alpha_agg = sum(alpha_agg)/len(alpha_agg)
lambda_agg = sum(lambda_agg)/len(lambda_agg)
random_agg = int(sum(random_agg)/len(random_agg))

best_set = {
    'max_depth': max_depth_agg,
    'learning_rate': lr_agg,
    'n_estimators': n_est_agg,
    'min_child_weight': min_cw_agg,
    'gamma': gamma_agg,
    'subsample': subsam_agg,
    'colsample_bytree': col_bytree_agg,
    'reg_alpha': alpha_agg,
    'reg_lambda': lambda_agg,
    'random_state': random_agg
}

best_set

{'max_depth': 5,
 'learning_rate': 0.14644306581236965,
 'n_estimators': 448,
 'min_child_weight': 2,
 'gamma': 0.5502081372840419,
 'subsample': 0.5248104261205337,
 'colsample_bytree': 0.5322044018262213,
 'reg_alpha': 0.43641087265405804,
 'reg_lambda': 0.48524812228197267,
 'random_state': 503}