In [1]:
import numpy as np
import pandas as pd
import csv
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import optuna
import time

In [2]:
def scrape(option: int, player_pos, player_name) -> pd.DataFrame:
    url = None

    if player_pos == 'D':
        # hacky way to get seasonal defensive stats
        url = 'https://www.fantasypros.com/nfl/stats/dst.php'
    else:
        url_head = r'https://www.nfl.com/players/'
        url_feet = '/stats/career'
        url = url_head + player_name + url_feet

    s_df = pd.read_html(url)
    return s_df[option]  # option of 0 = first table, 1 = second table

def helper(h_df: pd.DataFrame, h_df2: pd.DataFrame, h_fantasy_points: [float]):
    h_fantasy_points.append(sum(h_fantasy_points))  # this line calculates the total sum of all fantasy points on table
    h_df['Fantasy Points'] = h_fantasy_points

    # get rid of total row and bad data
    h_df.drop(h_df.shape[0] - 1, axis=0, inplace=True)
    h_df2.drop(h_df2.shape[0] - 1, axis=0, inplace=True)

    h_df.drop('YEAR', axis=1, inplace=True)
    h_df.drop('TEAM', axis=1, inplace=True)
    h_df.drop('G', axis=1, inplace=True)

    h_df2.drop('YEAR', axis=1, inplace=True)
    h_df2.drop('TEAM', axis=1, inplace=True)
    h_df2.drop('G', axis=1, inplace=True)

    h_df = h_df[::-1]  # reverse the rows
    h_df2 = h_df2[::-1]

    return h_df, h_df2, h_fantasy_points

def objective(trial):
    param = {
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.28),
        'n_estimators': trial.suggest_int('n_estimators', 40, 1000),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0.01, 1.0),
        'subsample': trial.suggest_float('subsample', 0.01, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.01, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1.0),
        'random_state': trial.suggest_int('random_state', 1, 1000)  # ,'seed': trial.suggest_int('seed', 0, 10000)
    }
    m = xgb.XGBRegressor(**param)
    m.fit(data_train, label_train)
    y_pred = m.predict(data_test)
    return np.sqrt(mean_squared_error(label_test, y_pred))

In [3]:
best_sets = []

with open('qbs.csv', mode ='r') as file:
    f = csv.reader(file)

    for row in f:
        fantasy_points = []
        player, pos = row[0], row[1]
        print("Optimizing: ", player)
        df = scrape(0, pos, player)
        df2 = scrape(1, pos, player)

        # fantasy calculations
        for i in range(df.shape[0] - 1):
            # passing yard fantasy calculation
            yard_val = df.at[i, 'YDS'] * 0.04
            points = yard_val

            # passing touchdown fantasy calculation
            pass_val = df.at[i, 'TD']
            points += (pass_val * 4)

            # interception fantasy calculation
            points -= df.at[i, 'INT']

            # rushing TD
            rush_td = df2.at[i, 'TD']
            points += (rush_td * 6)

            # rushing YDs
            rush_yds = df2.at[i, 'YDS'] * 0.1
            points += rush_yds

            fantasy_points.append(points)

        df, df2, fantasy_points = helper(df, df2, fantasy_points)

        # copy dataframe to make the target column for the XGBoost
        temp = pd.DataFrame()
        temp['p-COMP'] = df['COMP']
        temp['p-PCT'] = df['PCT']
        temp['p-YDS'] = df['YDS']
        temp['p-AVG'] = df['AVG']
        temp['p-LNG'] = df['LNG']
        temp['p-TD'] = df['TD']
        temp['p-INT'] = df['INT']
        temp['p-1st'] = df['1st']
        temp['p-1st%'] = df['1st%']
        temp['p-20+'] = df['20+']
        temp['p-SCK'] = df['SCK']
        temp['p-SCKY'] = df['SCKY']
        temp['p-RATE'] = df['RATE']

        temp['ru-ATT'] = df2['ATT']
        temp['ru-YDS'] = df2['YDS']
        temp['ru-AVG'] = df2['AVG']
        temp['ru-LNG'] = df2['LNG']
        temp['ru-TD'] = df2['TD']
        temp['ru-1st'] = df2['1st']
        temp['ru-1st%'] = df2['1st%']
        temp['ru-20+'] = df2['20+']
        temp['ru-40+'] = df2['40+']
        temp['ru-FUM'] = df2['FUM']

        temp['Fantasy Points'] = df['Fantasy Points']
        target = df['Fantasy Points'].tolist()
        target.pop(0)
        target.append(0)
        temp['Target'] = target
        df = temp
        df = df[::-1]  # reverse the rows

        df = df.fillna(0)
        df = df.iloc[1:]

        #print(df.shape[0])

        if df.shape[0] > 1:
            data, label = df.iloc[:, :-1], df.iloc[:, -1]
            xgb.DMatrix(data=data, label=label, enable_categorical=True)

            data_train, data_test = data.iloc[1:], data.iloc[:1]  # most recent season
            label_train, label_test = label.iloc[1:], label.iloc[:1]

            scaler = StandardScaler()
            #print(data_train)
            data_train = scaler.fit_transform(data_train)
            data_test = scaler.transform(data_test)

            model = xgb.XGBRegressor()
            model.fit(data_train, label_train)
            preds = model.predict(data_test)

            # optimize parameters
            num_trials = 200

            start = time.time()
            optuna.logging.set_verbosity(optuna.logging.WARNING)
            study = optuna.create_study(direction='minimize', study_name='regression') # when first run
            #study = joblib.load('qb.pkl')
            study.optimize(objective, n_trials=num_trials, show_progress_bar=True, n_jobs=-1)

            #joblib.dump(study, "test.pkl")  # save study

            # result = study.best_params
            result = study.best_trial.params
            #print("Time: ", time.time() - start)
            #print(result)
            best_sets.append(result)


            # save parameters to file
            #with open('current.txt', 'w') as f:
            #    f.write(json.dumps(result))

Optimizing:  patrick-mahomes


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  josh-allen-4


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  jalen-hurts


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  joe-burrow


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  geno-smith


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  justin-fields
Optimizing:  kirk-cousins


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  trevor-lawrence
Optimizing:  daniel-jones


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  jared-goff


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  justin-herbert


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  tom-brady


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  aaron-rodgers


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  lamar-jackson


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  tua-tagovailoa


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  russell-wilson


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  derek-carr


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  dak-prescott


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  kyler-murray


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  marcus-mariota


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  davis-mills
Optimizing:  andy-dalton


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  mac-jones
Optimizing:  jacoby-brissett


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  jimmy-garoppolo


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  matt-ryan


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  ryan-tannehill


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  kenny-pickett
Optimizing:  baker-mayfield


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  taylor-heinicke


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  carson-wentz


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  matthew-stafford


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  brock-purdy
Optimizing:  zach-wilson
Optimizing:  deshaun-watson


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  sam-darnold


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  mitchell-trubisky


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  mike-white
Optimizing:  cooper-rush


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  tyler-huntley


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  joe-flacco


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  jameis-winston


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  jarrett-stidham


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  phillip-walker


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  teddy-bridgewater


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  bailey-zappe
Optimizing:  gardner-minshew


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  desmond-ridder
Optimizing:  sam-ehlinger
Optimizing:  colt-mccoy


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  joshua-dobbs


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  malik-willis
Optimizing:  brett-rypien


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  davis-webb
Optimizing:  david-blough


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  skylar-thompson
Optimizing:  kyle-allen


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  sam-howell
Optimizing:  john-wolford


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  bryce-perkins
Optimizing:  jeff-driskel


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  trace-mcsorley


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  trey-lance
Optimizing:  nick-mullens


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  tyrod-taylor


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  jordan-love
Optimizing:  trevor-siemian


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  nathan-peterman


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  chris-streveler


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  chase-daniel


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  anthony-brown-3
Optimizing:  nick-foles


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  blaine-gabbert


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  brian-hoyer


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  brandon-allen


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  josh-johnson


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  case-keenum


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  jake-fromm
Optimizing:  jake-luton


In [4]:
best_sets

[{'max_depth': 8,
  'learning_rate': 0.1384822878569238,
  'n_estimators': 376,
  'min_child_weight': 1,
  'gamma': 0.8805800801207553,
  'subsample': 0.9150285039197229,
  'colsample_bytree': 0.9428735561944548,
  'reg_alpha': 0.7280885841001846,
  'reg_lambda': 0.9948461630186739,
  'random_state': 163},
 {'max_depth': 8,
  'learning_rate': 0.06181520232866862,
  'n_estimators': 163,
  'min_child_weight': 1,
  'gamma': 0.9821338180093987,
  'subsample': 0.34209062244562455,
  'colsample_bytree': 0.6138874793485276,
  'reg_alpha': 0.29670630034418133,
  'reg_lambda': 0.15613494945620007,
  'random_state': 71},
 {'max_depth': 1,
  'learning_rate': 0.2478190388815374,
  'n_estimators': 102,
  'min_child_weight': 1,
  'gamma': 0.6296135445245093,
  'subsample': 0.8122995072747319,
  'colsample_bytree': 0.2721076277288244,
  'reg_alpha': 0.12993193723156654,
  'reg_lambda': 0.22638517580402295,
  'random_state': 98},
 {'max_depth': 1,
  'learning_rate': 0.1621888307840021,
  'n_estimators

In [5]:
max_depth_agg = []
lr_agg = []
n_est_agg = []
min_cw_agg = []
gamma_agg = []
subsam_agg = []
col_bytree_agg = []
alpha_agg = []
lambda_agg = []
random_agg = []

for node in best_sets:
    max_depth_agg.append(node['max_depth'])
    lr_agg.append(node['learning_rate'])
    n_est_agg.append(node['n_estimators'])
    min_cw_agg.append(node['min_child_weight'])
    gamma_agg.append(node['gamma'])
    subsam_agg.append(node['subsample'])
    col_bytree_agg.append(node['colsample_bytree'])
    alpha_agg.append(node['reg_alpha'])
    lambda_agg.append(node['reg_lambda'])
    random_agg.append(node['random_state'])

max_depth_agg = int(sum(max_depth_agg)/len(max_depth_agg))
lr_agg = sum(lr_agg)/len(lr_agg)
n_est_agg = int(sum(n_est_agg)/len(n_est_agg))
min_cw_agg = int(sum(min_cw_agg)/len(min_cw_agg))
gamma_agg = sum(gamma_agg)/len(gamma_agg)
subsam_agg = sum(subsam_agg)/len(subsam_agg)
col_bytree_agg = sum(col_bytree_agg)/len(col_bytree_agg)
alpha_agg = sum(alpha_agg)/len(alpha_agg)
lambda_agg = sum(lambda_agg)/len(lambda_agg)
random_agg = int(sum(random_agg)/len(random_agg))

best_set = {
    'max_depth': max_depth_agg,
    'learning_rate': lr_agg,
    'n_estimators': n_est_agg,
    'min_child_weight': min_cw_agg,
    'gamma': gamma_agg,
    'subsample': subsam_agg,
    'colsample_bytree': col_bytree_agg,
    'reg_alpha': alpha_agg,
    'reg_lambda': lambda_agg,
    'random_state': random_agg
}

best_set

{'max_depth': 5,
 'learning_rate': 0.12737771893324404,
 'n_estimators': 432,
 'min_child_weight': 3,
 'gamma': 0.5108227694934251,
 'subsample': 0.5118441772075004,
 'colsample_bytree': 0.511912446346432,
 'reg_alpha': 0.4521761096756538,
 'reg_lambda': 0.5271042832557081,
 'random_state': 491}