In [7]:
import numpy as np
import pandas as pd
import csv
import xgboost as xgb
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
import optuna
import time

def scrape(option: int, player_pos, player_name) -> pd.DataFrame:
    url = None

    if player_pos == 'D':
        # hacky way to get seasonal defensive stats
        url = 'https://www.fantasypros.com/nfl/stats/dst.php'
    else:
        url_head = r'https://www.nfl.com/players/'
        url_feet = '/stats/career'
        url = url_head + player_name + url_feet

    s_df = pd.read_html(url)
    return s_df[option]  # option of 0 = first table, 1 = second table

def helper(h_df: pd.DataFrame, h_df2: pd.DataFrame, h_fantasy_points: [float]):
    h_fantasy_points.append(sum(h_fantasy_points))  # this line calculates the total sum of all fantasy points on table
    h_df['Fantasy Points'] = h_fantasy_points

    # get rid of total row and bad data
    h_df.drop(h_df.shape[0] - 1, axis=0, inplace=True)
    h_df2.drop(h_df2.shape[0] - 1, axis=0, inplace=True)

    h_df.drop('YEAR', axis=1, inplace=True)
    h_df.drop('TEAM', axis=1, inplace=True)
    h_df.drop('G', axis=1, inplace=True)

    h_df2.drop('YEAR', axis=1, inplace=True)
    h_df2.drop('TEAM', axis=1, inplace=True)
    h_df2.drop('G', axis=1, inplace=True)

    h_df = h_df[::-1]  # reverse the rows
    h_df2 = h_df2[::-1]

    return h_df, h_df2, h_fantasy_points

def objective(trial):
    param = {
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.28),
        'n_estimators': trial.suggest_int('n_estimators', 40, 1000),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0.01, 1.0),
        'subsample': trial.suggest_float('subsample', 0.01, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.01, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 1.0),
        'random_state': trial.suggest_int('random_state', 1, 1000)  # ,'seed': trial.suggest_int('seed', 0, 10000)
    }
    m = xgb.XGBRegressor(**param)
    m.fit(data_train, label_train)
    y_pred = m.predict(data_test)
    return np.sqrt(mean_squared_error(label_test, y_pred))

In [8]:
best_sets = []

with open('wrs.csv', mode ='r') as file:
    f = csv.reader(file)

    for row in f:
        fantasy_points = []
        player, pos = row[0], row[1]
        print("Optimizing: ", player)
        # for wr
        df = scrape(1, pos, player)  # get rushing
        df2 = scrape(0, pos, player)  # get receiving

        for i in range(df.shape[0] - 1):
            # rushing TD
            rush_td = df.at[i, 'TD']
            points = (rush_td * 6)

            # rushing YDs
            rush_yds = df.at[i, 'YDS'] * 0.1
            points += rush_yds

            # receiving TD
            rush_td = df2.at[i, 'TD']
            points += (rush_td * 6)

            # receiving YDs
            rush_yds = df2.at[i, 'YDS'] * 0.1
            points += rush_yds

            fantasy_points.append(points)


        df, df2, fantasy_points = helper(df, df2, fantasy_points)

        # copy dataframe to make the target column for the XGBoost
        temp = pd.DataFrame()


        if df.isnull().sum().sum() >= (df.shape[0]/2):
            print("Pass")
        else:
            temp['ru-ATT'] = df['ATT']
            temp['ru-YDS'] = df['YDS']
            temp['ru-AVG'] = df['AVG']
            temp['ru-LNG'] = df['LNG']
            temp['ru-TD'] = df['TD']
            temp['ru-1st'] = df['1st']
            temp['ru-1st%'] = df['1st%']
            temp['ru-20+'] = df['20+']
            temp['ru-40+'] = df['40+']
            temp['ru-FUM'] = df['FUM']

        temp['re-REC'] = df2['REC']
        temp['re-YDS'] = df2['YDS']
        temp['re-AVG'] = df2['AVG']
        temp['re-LNG'] = df2['LNG']
        temp['re-TD'] = df2['TD']
        temp['re-1st'] = df2['1st']
        temp['re-1st'] = df2['1st%']
        temp['re-20+'] = df2['20+']
        temp['re-40+'] = df2['40+']

        temp['Fantasy Points'] = df['Fantasy Points']
        target = df['Fantasy Points'].tolist()
        target.pop(0)
        target.append(0)
        temp['Target'] = target
        df = temp
        df = df[::-1]  # reverse the rows

        df = df.fillna(0)
        df = df.iloc[1:]

        #print(df.shape[0])

        if df.shape[0] > 1:
            data, label = df.iloc[:, :-1], df.iloc[:, -1]
            xgb.DMatrix(data=data, label=label, enable_categorical=True)

            data_train, data_test = data.iloc[1:], data.iloc[:1]  # most recent season
            label_train, label_test = label.iloc[1:], label.iloc[:1]

            scaler = StandardScaler()
            #print(data_train)
            data_train = scaler.fit_transform(data_train)
            data_test = scaler.transform(data_test)

            model = xgb.XGBRegressor()
            model.fit(data_train, label_train)
            preds = model.predict(data_test)

            # optimize parameters
            num_trials = 200

            start = time.time()
            optuna.logging.set_verbosity(optuna.logging.WARNING)
            study = optuna.create_study(direction='minimize', study_name='regression') # when first run
            #study = joblib.load('qb.pkl')
            study.optimize(objective, n_trials=num_trials, show_progress_bar=True, n_jobs=-1)

            #joblib.dump(study, "test.pkl")  # save study

            # result = study.best_params
            result = study.best_trial.params
            #print("Time: ", time.time() - start)
            #print(result)
            best_sets.append(result)

Optimizing:  justin-jefferson


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  davante-adams
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  tyreek-hill


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  a-j-brown
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  stefon-diggs
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  ceedee-lamb


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  jaylen-waddle
Optimizing:  amari-cooper
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  amon-ra-st-brown
Optimizing:  devonta-smith
Pass
Optimizing:  christian-kirk


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  ja-marr-chase
Optimizing:  tyler-lockett
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  terry-mclaurin
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  brandon-aiyuk


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  mike-evans
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  tee-higgins
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  jerry-jeudy
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  d-k-metcalf
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  d-j-moore


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  garrett-wilson
Optimizing:  cooper-kupp
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  gabe-davis
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  christian-watson
Optimizing:  chris-godwin
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  michael-pittman-2


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  tyler-boyd
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  zay-jones
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  allen-lazard
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  george-pickens
Optimizing:  mike-williams
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  jakobi-meyers
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  deebo-samuel


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  curtis-samuel


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  adam-thielen
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  juju-smith-schuster
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  donovan-peoples-jones
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  keenan-allen
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  josh-palmer
Optimizing:  mack-hollins
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  k-j-osborn
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  jahan-dotson
Optimizing:  courtland-sutton
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  diontae-johnson


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  marquise-brown
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  brandin-cooks
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  deandre-hopkins
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  parris-campbell
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  darius-slayton
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  marquez-valdes-scantling
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  devin-duvernay


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  isaiah-mckenzie
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  richie-james
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  russell-gage
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  rashid-shaheed
Optimizing:  noah-brown
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  marvin-jones
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  olamide-zaccheaus
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  kalif-raymond
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  mecole-hardman


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  d-j-chark
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  chris-moore
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  deandre-carter
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  josh-reynolds
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  corey-davis
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  marquise-goodwin
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  robert-woods
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  darnell-mooney


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  greg-dortch
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  romeo-doubs
Optimizing:  chase-claypool


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  demarcus-robinson
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  treylon-burks
Optimizing:  van-jefferson
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  trent-sherfield
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  kendrick-bourne
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  allen-robinson
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  quez-watkins
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  elijah-moore
Optimizing:  jamal-agnew


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  trenton-irwin
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  randall-cobb
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  rondale-moore
Optimizing:  julio-jones
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  dante-pettis
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  ben-skowronek
Pass
Optimizing:  tutu-atwell
Pass
Optimizing:  tyquan-thornton
Optimizing:  nelson-agholor
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  justin-watson
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  hunter-renfrow
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  kadarius-toney


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  braxton-berrios
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  shi-smith
Pass
Optimizing:  damiere-byrd
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  a-j-green
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  jarvis-landry
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  michael-thomas
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  tre-quan-smith
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  kendall-hinton
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  robbie-anderson
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  phillip-dorsett
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  sammy-watkins
Pass


  self._init_valid()


  0%|          | 0/200 [00:00<?, ?it/s]

Optimizing:  wan-dale-robinson


In [9]:
best_sets

[{'max_depth': 10,
  'learning_rate': 0.16587068830863264,
  'n_estimators': 226,
  'min_child_weight': 1,
  'gamma': 0.3146387028699212,
  'subsample': 0.32372774612408295,
  'colsample_bytree': 0.44213089791859006,
  'reg_alpha': 0.20554556324520204,
  'reg_lambda': 0.32589644248761473,
  'random_state': 582},
 {'max_depth': 4,
  'learning_rate': 0.11322068217860831,
  'n_estimators': 241,
  'min_child_weight': 7,
  'gamma': 0.3161590328671536,
  'subsample': 0.8170993965371112,
  'colsample_bytree': 0.046405139171371186,
  'reg_alpha': 0.10389671431278173,
  'reg_lambda': 0.5274914909284939,
  'random_state': 980},
 {'max_depth': 10,
  'learning_rate': 0.27835683534307926,
  'n_estimators': 633,
  'min_child_weight': 2,
  'gamma': 0.9746811218263312,
  'subsample': 0.7657647415850631,
  'colsample_bytree': 0.01569900238017724,
  'reg_alpha': 0.13516654838477699,
  'reg_lambda': 0.37582528844797125,
  'random_state': 728},
 {'max_depth': 10,
  'learning_rate': 0.017134871574546864,
 

In [10]:
max_depth_agg = []
lr_agg = []
n_est_agg = []
min_cw_agg = []
gamma_agg = []
subsam_agg = []
col_bytree_agg = []
alpha_agg = []
lambda_agg = []
random_agg = []

for node in best_sets:
    max_depth_agg.append(node['max_depth'])
    lr_agg.append(node['learning_rate'])
    n_est_agg.append(node['n_estimators'])
    min_cw_agg.append(node['min_child_weight'])
    gamma_agg.append(node['gamma'])
    subsam_agg.append(node['subsample'])
    col_bytree_agg.append(node['colsample_bytree'])
    alpha_agg.append(node['reg_alpha'])
    lambda_agg.append(node['reg_lambda'])
    random_agg.append(node['random_state'])

max_depth_agg = int(sum(max_depth_agg)/len(max_depth_agg))
lr_agg = sum(lr_agg)/len(lr_agg)
n_est_agg = int(sum(n_est_agg)/len(n_est_agg))
min_cw_agg = int(sum(min_cw_agg)/len(min_cw_agg))
gamma_agg = sum(gamma_agg)/len(gamma_agg)
subsam_agg = sum(subsam_agg)/len(subsam_agg)
col_bytree_agg = sum(col_bytree_agg)/len(col_bytree_agg)
alpha_agg = sum(alpha_agg)/len(alpha_agg)
lambda_agg = sum(lambda_agg)/len(lambda_agg)
random_agg = int(sum(random_agg)/len(random_agg))

best_set = {
    'max_depth': max_depth_agg,
    'learning_rate': lr_agg,
    'n_estimators': n_est_agg,
    'min_child_weight': min_cw_agg,
    'gamma': gamma_agg,
    'subsample': subsam_agg,
    'colsample_bytree': col_bytree_agg,
    'reg_alpha': alpha_agg,
    'reg_lambda': lambda_agg,
    'random_state': random_agg
}

best_set

{'max_depth': 5,
 'learning_rate': 0.14059527657696863,
 'n_estimators': 361,
 'min_child_weight': 3,
 'gamma': 0.4912187267826963,
 'subsample': 0.5045950075765605,
 'colsample_bytree': 0.524596396920619,
 'reg_alpha': 0.4413223538576955,
 'reg_lambda': 0.48185366891338777,
 'random_state': 505}