# 2019 Fantasy Football Projections based on 2015-18 statistics.

### Importing Packages

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from xgboost import XGBRegressor
from sklearn.feature_selection import RFE
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from sklearn.metrics import make_scorer

In [2]:
import warnings
warnings.filterwarnings("ignore")

### Loading Data

In [3]:
relevant_columns_stats = ["Season", "playerID", "game.id", "Team", "name", "pass.att", "pass.comp",
                    "passyds", "pass.tds", "pass.ints", "rush.att", "rushyds",
                    "rushtds", "recyds", "rec.tds", "recept", "kickret.avg",
                   "kickret.tds", "kick.rets", "punt.rets", "puntret.avg", 
                   "puntret.tds", "fgm", "fga", "fgyds", "xpmade", "xpmissed",
                   "sacks", 'defints', "forced.fumbs", "totalfumbs", "recfumbs", "pass.twoptm",
                   "fumbslost", "rec.twoptm", "rush.twoptm"]


df_train = pd.read_csv("2009stats.csv", usecols = relevant_columns_stats)
df_train = df_train.append(pd.read_csv("2010stats.csv", usecols = relevant_columns_stats))
df_train = df_train.append(pd.read_csv("2011stats.csv", usecols = relevant_columns_stats))
df_train = df_train.append(pd.read_csv("2012stats.csv", usecols = relevant_columns_stats))
df_train = df_train.append(pd.read_csv("2013stats.csv", usecols = relevant_columns_stats))
df_train = df_train.append(pd.read_csv("2014stats.csv", usecols = relevant_columns_stats))
df_train = df_train.append(pd.read_csv("2015stats.csv", usecols = relevant_columns_stats))
df_train = df_train.append(pd.read_csv("2016stats.csv", usecols = relevant_columns_stats))
df_train = df_train.append(pd.read_csv("2017stats.csv", usecols = relevant_columns_stats))
df_train = df_train.append(pd.read_csv("2018stats.csv", usecols = relevant_columns_stats))
df_train.Team = df_train.Team.replace("STL","LA")
df_train.Team = df_train.Team.replace("SD","LAC")
df_train.Team = df_train.Team.replace("JAC","JAX")


df_train.dropna(inplace = True)

relevant_columns_def = ["Tm","PR TD","KR TD","FblTD","IntTD","OthTD","Sfty"]

relevant_columns_def = ["Tm","PR TD","KR TD","FblTD","IntTD","OthTD","Sfty"]

df_def = pd.read_csv("2009def.csv", usecols = relevant_columns_def)
df_def = df_def.append(pd.read_csv("2010def.csv", usecols = relevant_columns_def))
df_def = df_def.append(pd.read_csv("2011def.csv", usecols = relevant_columns_def))
df_def = df_def.append(pd.read_csv("2012def.csv", usecols = relevant_columns_def))
df_def = df_def.append(pd.read_csv("2013def.csv", usecols = relevant_columns_def))
df_def = df_def.append(pd.read_csv("2014def.csv", usecols = relevant_columns_def))
df_def = df_def.append(pd.read_csv("2015def.csv", usecols = relevant_columns_def))
df_def = df_def.append(pd.read_csv("2016def.csv", usecols = relevant_columns_def))
df_def = df_def.append(pd.read_csv("2017def.csv", usecols = relevant_columns_def))
df_def = df_def.append(pd.read_csv("2018def.csv", usecols = relevant_columns_def))

### Adding Attributes for Opposition and Making Way to Merge Data Sets

In [4]:
df_def["sum_vals"] = 1

df_abbreviations = pd.read_csv("abbreviations.csv")

df_def = df_def.merge(df_abbreviations, on = ["Tm"])


df_def["Season"] = df_def.groupby("Team")["sum_vals"].transform(lambda x: x.cumsum() + 2008)

df_def.fillna(0, inplace = True)

df_def.drop(["Tm", "sum_vals"], inplace = True, axis = 1)

df_train['opp'] = df_train.groupby("game.id").Team.transform(lambda x: x.unique()[0])
df_train['opp2'] = df_train.groupby("game.id").Team.transform(lambda x: x.unique()[1])

df_train.loc[df_train['opp'] == df_train['Team'], 'opp'] = df_train.loc[df_train['opp'] == df_train['Team'], 'opp2']
df_train.drop(['opp2'], axis = 1, inplace = True)

### Separating Data into Offense and Defense

In [5]:
def_columns = ["Team","rec.twoptm",
              "passyds", "pass.tds", "rushyds",
              "rushtds", "kickret.tds", "puntret.tds", "fgm",
              "xpmade", "fumbslost", "pass.ints",
              "rush.twoptm"]

df_defense = df_train

df_defense["points_allowed"] = ((df_defense["pass.tds"] + df_defense["rushtds"] + df_defense["kickret.tds"]
                                 + df_defense["puntret.tds"]) * 6 + df_defense["fgm"] * 3 +
                                df_defense["rec.twoptm"] * 2 + df_defense["rush.twoptm"] * 2
                                + df_defense["xpmade"])

df_defense["yards"] = df_defense["passyds"] + df_defense["rushyds"]

df_defense = df_defense.groupby(["game.id","opp", "Season",])["yards", "points_allowed"].sum().reset_index()

In [6]:
df_defense["0pts"] = df_defense.points_allowed == 0
df_defense["1_6pts"] = df_defense.points_allowed.between(1, 6)
df_defense["7_13pts"] = df_defense.points_allowed.between(7, 13)
df_defense["14_17pts"] = df_defense.points_allowed.between(14, 17)
df_defense["18_27pts"] = df_defense.points_allowed.between(18, 27)
df_defense["28_34pts"] = df_defense.points_allowed.between(28, 34)
df_defense["35_45pts"] = df_defense.points_allowed.between(35, 45)
df_defense["45pts"] = df_defense.points_allowed > 45

df_defense["YA100"] = df_defense.yards < 100
df_defense["YA199"] = df_defense.yards.between(100, 199)
df_defense["YA299"] = df_defense.yards.between(200, 299)
df_defense["YA349"] = df_defense.yards.between(300, 349)
df_defense["YA399"] = df_defense.yards.between(350, 399)
df_defense["YA449"] = df_defense.yards.between(400, 449)
df_defense["YA499"] = df_defense.yards.between(450, 499)
df_defense["YA549"] = df_defense.yards.between(500, 549)
df_defense["YA550"] = df_defense.yards > 549

In [7]:
def_columns = ["0pts", "1_6pts", "7_13pts", "14_17pts",
               "28_34pts","35_45pts","45pts",
               "YA100","YA199","YA299","YA399",
               "YA449","YA499","YA549","YA550", "YA349", "18_27pts"]


df_defense = df_defense.groupby(["Season", "opp"])[def_columns].sum()



df_defense["Sacks"] = df_train.groupby(["Season", "Team"])["sacks"].sum()

df_train["exp"] = df_train.groupby("playerID").Season.transform(lambda x: x - min(x))

df_defense["exp"] = df_train.groupby(["Season", "Team"])["exp"].mean()

df_train.drop("exp", inplace = True, axis = 1)

df_defense["pass.ints"] = df_train.groupby(["Season", "opp"])["pass.ints"].sum()

df_defense["fumbslost"] = df_train.groupby(["Season", "opp"])["fumbslost"].sum()


df_defense = df_defense.merge(df_def, left_on = ["opp", "Season"], right_on = ["Team", "Season"])

df_defense["TD"] = (df_defense["PR TD"] + df_defense["KR TD"] + df_defense["FblTD"] + df_defense["IntTD"] +
+df_defense["OthTD"])

df_defense["turnovers"] = df_defense["fumbslost"] + df_defense["pass.ints"]

df_defense.drop(["fumbslost", "pass.ints", 
            "PR TD","KR TD","FblTD","IntTD","OthTD"], axis = 1, inplace = True)

off_columns = ["Season", "playerID", "game.id", "Team", "name", "pass.att", "pass.comp",
                    "passyds", "pass.tds", "pass.ints", "rush.att", "rushyds",
                    "rushtds", "recyds", "rec.tds", "recept", "kickret.avg",
                   "kickret.tds", "kick.rets", "punt.rets", "puntret.avg", 
                   "puntret.tds", "fumbslost", "rec.twoptm", "rush.twoptm", "pass.twoptm",
              "fgm", "fga", "fgyds", "xpmade", "xpmissed", "totalfumbs", "opp"]

off_columns_numeric = ["pass.att", "pass.comp", "totalfumbs", "fumbslost",
                    "passyds", "pass.tds", "pass.ints", "rush.att", "rushyds",
                    "rushtds", "recyds", "rec.tds", "recept", "kickret.avg",
                   "kickret.tds", "kick.rets", "punt.rets", "puntret.avg", 
                   "puntret.tds", "rec.twoptm", "rush.twoptm", "pass.twoptm",
                      "fgm", "fga", "fgyds", "xpmade", "xpmissed"]

df_offense = df_train[off_columns]

df_offense = df_offense.loc[(df_offense[off_columns_numeric] > 0).any(axis = 1)]

df_offense.Team = df_offense.groupby(["Season", "playerID"]).Team.transform(lambda x: x.iloc[len(x) - 1])

df_offense = pd.merge(df_offense.groupby(["Season", "playerID", "Team"])[off_columns_numeric].sum().reset_index(),
                      df_offense[["playerID", "name"]].drop_duplicates("playerID", keep = "last"), on = "playerID")

### Adding Position

In [8]:
relevant_columns_pos = ["GSIS_ID", "Season", "Pos"]

df_pos = pd.read_csv("2009Pos.csv", usecols = relevant_columns_pos)
df_pos = df_pos.append(pd.read_csv("2010Pos.csv", usecols = relevant_columns_pos))
df_pos = df_pos.append(pd.read_csv("2011Pos.csv", usecols = relevant_columns_pos))
df_pos = df_pos.append(pd.read_csv("2012Pos.csv", usecols = relevant_columns_pos))
df_pos = df_pos.append(pd.read_csv("2013Pos.csv", usecols = relevant_columns_pos))
df_pos = df_pos.append(pd.read_csv("2014Pos.csv", usecols = relevant_columns_pos))
df_pos = df_pos.append(pd.read_csv("2015Pos.csv", usecols = relevant_columns_pos))
df_pos = df_pos.append(pd.read_csv("2016Pos.csv", usecols = relevant_columns_pos))
df_pos = df_pos.append(pd.read_csv("2017Pos.csv", usecols = relevant_columns_pos))
df_pos = df_pos.append(pd.read_csv("2018Pos.csv", usecols = relevant_columns_pos))

df_pos.Pos = df_pos.Pos.replace("FB","RB")

df_offense = pd.merge(df_offense, df_pos, left_on = ["playerID", "Season"], 
                      right_on = ["GSIS_ID", "Season"], how = "left").drop("GSIS_ID", axis = 1)

df_offense.Pos = df_offense.groupby("playerID").Pos.fillna(method = 'ffill')
df_offense.Pos = df_offense.groupby("playerID").Pos.fillna(method = 'bfill')

### Adding data for previous data

In [9]:
relevant_prev_columns = ["pass.att", "pass.comp", "totalfumbs",
                    "passyds", "pass.tds", "pass.ints", "rush.att", "rushyds",
                    "rushtds", "recyds", "rec.tds", "recept"]


relevant_columns_spotrac = ['PLAYER', 'POS', 'TEAM']

df_spotrac = pd.read_csv("Spotrac_NFLActivePlayerContracts.csv", encoding = "ISO-8859-1",
                        usecols = relevant_columns_spotrac)

df_spotrac = df_spotrac.loc[df_spotrac.POS != 'OLB']
df_spotrac = df_spotrac.loc[df_spotrac.POS != 'DT']
df_spotrac = df_spotrac.loc[df_spotrac.POS != 'DE']
df_spotrac = df_spotrac.loc[df_spotrac.POS != 'LT']
df_spotrac = df_spotrac.loc[df_spotrac.POS != 'ILB']
df_spotrac = df_spotrac.loc[df_spotrac.POS != 'G']
df_spotrac = df_spotrac.loc[df_spotrac.POS != 'FS']
df_spotrac = df_spotrac.loc[df_spotrac.POS != 'CB']
df_spotrac = df_spotrac.loc[df_spotrac.POS != 'RT']
df_spotrac = df_spotrac.loc[df_spotrac.POS != 'SS']
df_spotrac = df_spotrac.loc[df_spotrac.POS != 'C']
df_spotrac = df_spotrac.loc[df_spotrac.POS != 'P']
df_spotrac = df_spotrac.loc[df_spotrac.POS != 'LB']
df_spotrac = df_spotrac.loc[df_spotrac.POS != 'T']
df_spotrac = df_spotrac.loc[df_spotrac.POS != 'S']
df_spotrac = df_spotrac.loc[df_spotrac.POS != 'LS']

df_spotrac.PLAYER = df_spotrac.PLAYER.transform(lambda x: x.str.split('\xa0').str.get(0))
df_spotrac.PLAYER = df_spotrac.PLAYER.transform(lambda x: x.str.split(' ').str.get(0).str[0] + '.' + 
                                         x.str.split(' ').str.get(-1))

df_spotrac.TEAM = df_spotrac.TEAM.transform(lambda x: x.str.split('Signed').str.get(0))
df_spotrac.rename(columns={"PLAYER": "name", "TEAM": "next_Team", "POS": "Pos"}, inplace = True)

df_spotrac.loc[df_spotrac.name == "O.Jr.", "name"] = "O.Beckham"
df_spotrac.loc[df_spotrac.name == "T.Jr.", "name"] = "T.Ginn"

df_spotrac["name_dict"] = df_spotrac["name"] + "_" + df_spotrac["Pos"]

df_offense.sort_values(["Season", "Team"], inplace = True)
df_offense.reset_index(inplace = True, drop = True)

df_offense["name_dict"] = df_offense["name"] + "_" + df_offense["Pos"]

df_offense["next_Team"] = df_offense.groupby("playerID").Team.transform(lambda x: x.shift(-1))
df_offense["prev_Team"] = df_offense.groupby("playerID").Team.transform(lambda x: x.shift())
df_offense.next_Team = df_offense.set_index("name_dict").next_Team.fillna(
    df_spotrac.reindex(index=df_spotrac.index[::-1]).set_index("name_dict").to_dict()["next_Team"]).tolist()

df_offense["next_Season"] = df_offense.Season + 1

df_offense_2019 = df_offense.loc[df_offense.Season == 2018]

df_offense_2019[list(set(df_offense_2019.columns) - set(["Season", "playerID", "Team", "name", "next_Team", "Pos"]))] = np.nan
df_offense_2019["Season"] = df_offense_2019["Season"] + 1
df_offense_2019["prev_Team"] = df_offense_2019["Team"]
df_offense_2019["Team"] = df_offense_2019["next_Team"]

df_offense = df_offense.append(df_offense_2019)
#df_offense = df_offense.dropna(axis = 0)

df_total_offense = df_offense.groupby(["next_Season", "next_Team"])[relevant_prev_columns].apply(lambda x: x.sum()).reset_index()

player_prev_merge_cols = ["pass.att", "pass.comp", "totalfumbs",
                    "passyds", "pass.tds", "pass.ints", "rush.att", "rushyds",
                    "rushtds", "recyds", "rec.tds", "recept", "kickret.avg",
                   "kickret.tds", "kick.rets", "punt.rets", "puntret.avg", 
                   "puntret.tds", "rec.twoptm", "rush.twoptm", "pass.twoptm",
                      "fgm", "fga", "fgyds", "xpmade", "xpmissed",
                    "playerID", "next_Season"]

In [10]:
relevant_columns_sched2019 = ["Week", "VisTm", "HomeTm"]

df_sched2019 = pd.read_csv("profootballref_schedule.csv", usecols = relevant_columns_sched2019)
df_sched2019 = df_sched2019.loc[~df_sched2019.Week.str.contains("Pre")]

In [11]:
df_sched2019 = pd.merge(pd.merge(df_sched2019, df_abbreviations, left_on = "VisTm", right_on = "Tm").drop(["VisTm", "Tm"],
                                                                                                          axis = 1),
         df_abbreviations,
        left_on = "HomeTm",
        right_on = "Tm")[["Team_x", "Team_y"]]

df_sched2019 = df_sched2019.append(df_sched2019.rename({"Team_x": "Team_y", "Team_y": "Team_x"}))

df_sched2019.rename({"Team_x": "opp", "Team_y": "Team"})

df_sched2019.columns = ["opp", "Team"]

df_sched2019["Season"] = 2019



In [12]:
df_offense = pd.merge(df_offense, df_offense[player_prev_merge_cols], left_on = ["playerID", "Season"],
                      right_on = ["playerID", "next_Season"],
                     suffixes = ["", "_player_prev"])

df_offense = pd.merge(df_offense, df_total_offense, left_on = ["Team", "Season"], right_on = ["next_Team", "next_Season"],
                     suffixes = ["", "_new_Team_prev"])

df_offense = pd.merge(df_offense, df_total_offense, left_on = ["prev_Team", "Season"], right_on = ["next_Team", "next_Season"],
                     suffixes = ["", "_old_Team_prev"])

for col in relevant_prev_columns:
    df_offense[col + "_Team_prev_delta"] = df_offense[col + "_new_Team_prev"] - df_offense[col + "_old_Team_prev"]
    
    
df_offense["exp"] = df_offense.groupby("playerID").Season.transform(lambda x: x - min(x))


relevant_sched_columns  = ["Team",  "passyds", "pass.tds", "rushyds",
              "rushtds", "kickret.tds", "puntret.tds", "fgm", 'fga', 'xpmissed',
              "xpmade", "totalfumbs", "pass.ints"]


df_sched = df_train.groupby(["Season", "opp"])[["Team", "game.id"]].apply(lambda x: x.drop_duplicates("game.id")).reset_index().drop(["level_2", "game.id"], axis = 1)

df_prevSched =  pd.merge(df_sched,
         (df_train.groupby(["Season", "Team"])[relevant_sched_columns].sum()/16).reset_index(),
        left_on = ["Season", "opp"], right_on = ["Season", "Team"]).groupby(["opp","Season"]).sum().reset_index()
 
df_prevSched["Season"] += 1    

df_sched = df_sched.append(df_sched2019)

df_sched.Season -= 1

df_currSched =  pd.merge(df_sched,
         (df_train.groupby(["Season", "Team"])[relevant_sched_columns].sum()/16).reset_index(),
        left_on = ["Season", "opp"], right_on = ["Season", "Team"]).groupby(["opp","Season"]).sum().reset_index()
 
df_currSched["Season"] += 1     

df_offense = pd.merge(df_offense,
                      df_prevSched,
         left_on = ["Team", "Season"],
         right_on = ["opp", "Season"],
         suffixes = ["", "_curr_sched"])
    
    
df_offense = pd.merge(df_offense, 
        df_currSched,
         left_on = ["prev_Team", "Season"],
         right_on = ["opp", "Season"],
         suffixes = ["", "_past_sched"])

df_defense = pd.merge(df_defense,
                     df_currSched,
         left_on = ["Team", "Season"],
         right_on = ["opp", "Season"],
         suffixes = ["", "_curr_sched"])

df_defense = pd.merge(df_defense,
                     df_prevSched,
         left_on = ["Team", "Season"],
         right_on = ["opp", "Season"],
         suffixes = ["", "_past_sched"])

for col in list(set(relevant_sched_columns) - set(["Team"])):
    df_offense[col + "_delta_sched"] = df_offense[col + "_past_sched"] - df_offense[col + "_curr_sched"]
    df_defense[col + "_delta_sched"] = df_defense[col + "_past_sched"] - df_defense[col]

### Determing Points for each Season

In [13]:
df_offense["points"] = (df_offense["passyds"] * 0.025 + 4 * df_offense["pass.tds"] - 2 * df_offense["pass.ints"]
                       + 0.1 * df_offense["rushyds"] + 6 * df_offense["rushtds"] + 0.1 * df_offense["recyds"] +
                        6 * df_offense["rec.tds"] + 6 * df_offense["kickret.tds"] + 6 * df_offense["puntret.tds"]
                       + 2 * df_offense["rec.twoptm"] + 2 * df_offense["rush.twoptm"] + 2 * df_offense["pass.twoptm"]
                       + 4 * df_offense["fgm"] - df_offense["fga"] + 
                        df_offense["xpmade"] - 2 * df_offense["fumbslost"])

In [14]:
df_offense.drop(['name_dict',
 'next_Team',
 'prev_Team',
 'next_Season',
'opp',
                 "playerID",
 'next_Team_new_Team_prev',
 'next_Team_old_Team_prev',
  'opp_past_sched',
 'next_Season_player_prev',
 'pass.att',
 'pass.comp',
 'fumbslost',
 'totalfumbs',
 'passyds',
 'pass.tds',
 'pass.ints',
 'rush.att',
 'rushyds',
 'rushtds',
 'recyds',
 'rec.tds',
 'recept',
 'kickret.avg',
 'kickret.tds',
 'kick.rets',
 'punt.rets',
 'puntret.avg',
 'puntret.tds',
 'rec.twoptm',
 'rush.twoptm',
 'pass.twoptm',
 'fgm',
 'fga',
 'fgyds',
 'xpmade',
 'xpmissed',], axis = 1, inplace = True)

In [15]:
df_defense_scores = (df_defense["TD"]*6 + df_defense["turnovers"] * 2 + df_defense["Sacks"] + df_defense["Sfty"] * 2 +
                    (df_defense["0pts"] + df_defense["YA100"]) * 5 + 
                    df_defense['1_6pts'] * 4 +
                    (df_defense["7_13pts"] + df_defense["YA199"]) * 3 +
                     df_defense["YA299"] * 2 +
                    df_defense["14_17pts"] +
                    (df_defense["YA399"] + df_defense["28_34pts"]) * (-1) +
                    (df_defense["YA449"] + df_defense["35_45pts"]) * (-3) +
                    (df_defense["YA499"] + df_defense["45pts"]) * (-5) +
                    (df_defense["YA549"]) * (-6) +
                    (df_defense["YA550"]) * (-7)).to_frame()


df_defense_scores.columns = ["points"]

df_defense_scores["Team"] = df_defense["Team"]
df_defense_scores["Season"] = df_defense["Season"] - 1

df_defense_scores_2018 = df_defense_scores.loc[df_defense_scores.Season == 2017]
df_defense_scores_2018["Season"] = df_defense_scores_2018["Season"] + 1
df_defense_scores_2018["points"] = np.nan

df_defense_scores = df_defense_scores.append(df_defense_scores_2018)

df_defense = pd.merge(df_defense, df_defense_scores, suffixes = ["_prev", ""])

df_defense.Season = df_defense.Season + 1

df_offense = pd.get_dummies(df_offense, columns = ["exp"])

### Making Predictions

In [16]:
df_offense_train = df_offense.loc[~df_offense.points.isna()]
df_offense_hold = df_offense.loc[df_offense.points.isna()]

df_defense_train = df_defense.loc[~df_defense.points.isna()]
df_defense_hold = df_defense.loc[df_defense.points.isna()]

In [17]:
X_train_QB = df_offense_train.loc[df_offense_train.Pos == "QB"].drop("points", axis = 1)
y_train_QB = df_offense_train.loc[df_offense_train.Pos == "QB"].points

X_train_RB = df_offense_train.loc[df_offense_train.Pos == "RB"].drop("points", axis = 1)
y_train_RB = df_offense_train.loc[df_offense_train.Pos == "RB"].points


X_train_WR = df_offense_train.loc[df_offense_train.Pos == "WR"].drop("points", axis = 1)

y_train_WR = df_offense_train.loc[df_offense_train.Pos == "WR"].points


X_train_TE = df_offense_train.loc[df_offense_train.Pos == "TE"].drop("points", axis = 1)
y_train_TE = df_offense_train.loc[df_offense_train.Pos == "TE"].points


X_train_K = df_offense_train.loc[df_offense_train.Pos == "K"].drop("points", axis = 1)
y_train_K = df_offense_train.loc[df_offense_train.Pos == "K"].points

X_train_D = df_defense_train.drop("points", axis = 1)
y_train_D = df_defense_train["points"]

In [20]:
def rmse(actual, predict):

    return sum(abs(actual - predict))/len(predict)

rmse_score = make_scorer(rmse, greater_is_better = False)

def featureModelSelection(X_train, y_train, Pos):
    f = open('results_' + Pos + '.txt', 'w')
    
    if "Pos" in X_train.columns:
        X_train = X_train.drop(["Team", "Pos", "name"], axis = 1)
    else:
        X_train = X_train.drop(["Team", "opp", "opp_past_sched"], axis = 1)

    models = [LinearRegression(), Ridge(), ElasticNet(), Lasso()]
    
    params = [[{"n_features_to_select": np.arange(1, len(X_train.columns) + 1)}], 
              [{"estimator__alpha": [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000],
         "n_features_to_select": np.arange(1, len(X_train.columns) + 1)}],
              [{"estimator__alpha": [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000],
                "estimator__l1_ratio": [0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99, 0.999],
         "n_features_to_select": np.arange(1, len(X_train.columns) + 1)}],
             [{"estimator__alpha": [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000],"n_features_to_select": np.arange(1, len(X_train.columns) + 1)}
             ]]

    for m in range(len(models)):
        rfe = RFE(estimator=models[m], step=1)
        
        gs = GridSearchCV(rfe, params[m], n_jobs = -1, scoring = rmse_score).fit( X_train, y_train)
        #score = np.mean(cross_val_score(rfe, X_train, y_train, 
        #                  n_jobs=-1, cv=5))
                    
        print("\nModel: " + type(models[m]).__name__)
        print("\nScore: " + str(gs.best_score_))
        print("\nParams: " + str(gs.best_params_))
        
        
        
    
        print("\nModel: " + type(models[m]).__name__, file =f )
        print("\nScore: " + str(gs.best_score_), file = f)
        print("\nParams: " + str(gs.best_params_), file = f)

    
    f.close()
        

In [21]:
featureModelSelection(X_train_QB, y_train_QB, "QB")

featureModelSelection(X_train_RB, y_train_RB, "RB")

featureModelSelection(X_train_WR, y_train_WR, "WR")

featureModelSelection(X_train_TE, y_train_TE, "TE")

featureModelSelection(X_train_K, y_train_K, "K")

featureModelSelection(X_train_D, y_train_D, "D")


Model: LinearRegression

Score: -47.092628634494126

Params: {'n_features_to_select': 23}

Model: Ridge

Score: -45.23605624456509

Params: {'estimator__alpha': 10000, 'n_features_to_select': 44}

Model: ElasticNet

Score: -44.509904438505714

Params: {'estimator__alpha': 1000, 'estimator__l1_ratio': 0.001, 'n_features_to_select': 9}

Model: Lasso

Score: -44.6305494029434

Params: {'estimator__alpha': 100, 'n_features_to_select': 5}

Model: LinearRegression

Score: -41.4602537779972

Params: {'n_features_to_select': 69}

Model: Ridge

Score: -39.107918541277904

Params: {'estimator__alpha': 10000, 'n_features_to_select': 44}


KeyboardInterrupt: 

array([ 73.18039872, 126.17439531,  43.77063301, 136.26995401,
        43.15478627,  88.66026237, -12.92939804, 144.48943905,
       174.55754898,  -1.53202123,  13.64191488,   9.53689192,
        28.73038361, 189.90272096, 145.28542542,  35.95195566,
       199.64545591, 149.53275542, 195.48655727, -18.56353933,
       175.64064846,  10.33360807,  82.6482293 , 124.07806591,
       157.6382527 ,  45.7686818 , 214.65648919,   8.09368109,
       126.5958879 ,  59.86527464, 137.12765152, 180.04126097,
         8.39594255, 118.06744816, 148.44802078,  37.96097239,
        58.91812901, 103.06444868, 108.21149935,  34.53479693,
       128.01843682,  32.64831376, 231.02298349,  36.25124735,
         9.80202237, 147.48674538,  22.70716032,  50.9907024 ,
       180.35976957,  97.1023768 , 129.19472758,  38.59536373,
       -14.24809758, 264.47022495, 160.67627805,  29.09125155,
         4.27036084, 270.54119858, 180.61929225,  39.18390384,
       185.89023483, -52.01212598, 233.53883572,  46.57

In [37]:
qB = df_offense_hold.loc[df_offense_hold.Pos == "QB"]

In [39]:
qB.points = LinearRegression().fit(X_train_QB.drop(["Team", "Pos", "name"], axis = 1), y_train_QB).predict(
    df_offense_hold.loc[df_offense_hold.Pos == "QB"].drop(["Team", "Pos", "name", "points"], axis = 1))

In [22]:
X_train_QB = df_offense_train.loc[df_offense_train.Pos == "QB"].drop(["points",
       'recyds_player_prev', 'rec.tds_player_prev', 'recept_player_prev',
       'kickret.avg_player_prev', 'kickret.tds_player_prev',
       'kick.rets_player_prev', 'punt.rets_player_prev',
       'puntret.avg_player_prev', 'puntret.tds_player_prev',
       'rec.twoptm_player_prev',  'fgm_player_prev', 'fga_player_prev',
       'xpmade_player_prev', 'xpmissed_player_prev'], axis = 1)


y_train_QB = df_offense_train.loc[df_offense_train.Pos == "QB"].points

X_train_RB = df_offense_train.loc[df_offense_train.Pos == "RB"].drop(["points",
       'pass.att_player_prev', 
       'pass.comp_player_prev',
       'passyds_player_prev', 'pass.tds_player_prev', 'pass.ints_player_prev',
       'fgm_player_prev', 'fga_player_prev',
       'xpmade_player_prev', 'xpmissed_player_prev'], axis = 1)

y_train_RB = df_offense_train.loc[df_offense_train.Pos == "RB"].points


X_train_WR = df_offense_train.loc[df_offense_train.Pos == "WR"].drop(["points",
       'pass.att_player_prev',
       'pass.comp_player_prev',
       'passyds_player_prev', 'pass.tds_player_prev', 'pass.ints_player_prev',
       'fgm_player_prev', 'fga_player_prev',
       'xpmade_player_prev', 'xpmissed_player_prev',
        'rush.att_player_prev', 'rushyds_player_prev', 'rushtds_player_prev'], axis = 1)

y_train_WR = df_offense_train.loc[df_offense_train.Pos == "WR"].points


X_train_TE = df_offense_train.loc[df_offense_train.Pos == "TE"].drop(["points",
       'pass.att_player_prev',
       'pass.comp_player_prev',
       'passyds_player_prev', 'pass.tds_player_prev', 'pass.ints_player_prev',
       'fgm_player_prev', 'fga_player_prev',
       'xpmade_player_prev', 'xpmissed_player_prev',
        'rush.att_player_prev', 'rushyds_player_prev', 'rushtds_player_prev'], axis = 1)
y_train_TE = df_offense_train.loc[df_offense_train.Pos == "TE"].points


X_train_K = df_offense_train.loc[df_offense_train.Pos == "K"].drop(["points",
       'pass.att_player_prev',
       'pass.comp_player_prev', 'totalfumbs_player_prev',
       'passyds_player_prev', 'pass.tds_player_prev', 'pass.ints_player_prev',
       'rush.att_player_prev', 'rushyds_player_prev', 'rushtds_player_prev',
       'recyds_player_prev', 'rec.tds_player_prev', 'recept_player_prev',
       'kickret.avg_player_prev', 'kickret.tds_player_prev',
       'kick.rets_player_prev', 'punt.rets_player_prev',
       'puntret.avg_player_prev', 'puntret.tds_player_prev',
       'rec.twoptm_player_prev', 'rush.twoptm_player_prev',
       'pass.twoptm_player_prev'], axis = 1)

y_train_K = df_offense_train.loc[df_offense_train.Pos == "K"].points

X_train_D = df_defense_train.drop("points", axis = 1)
y_train_D = df_defense_train["points"]

In [23]:
print('\nMean QB Linear Regression CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(LinearRegression(),
                                      X_train_QB.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_QB))))
print('Mean QB Ridge CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(Ridge(),
                                      X_train_QB.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_QB))))
print('Mean QB Lasso CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(Lasso(),
                                      X_train_QB.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_QB))))

print('Mean QB Elastic Net CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(ElasticNet(),
                                      X_train_QB.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_QB))))

print('Mean QB RandomForestRegressor CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(RandomForestRegressor(),
                                      X_train_QB.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_QB))))


print('Mean QB XGB CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(XGBRegressor(),
                                      X_train_QB.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_QB))))



Mean QB Linear Regression CV Score: 0.563
Mean QB Ridge CV Score: 0.564
Mean QB Lasso CV Score: 0.590
Mean QB Elastic Net CV Score: 0.590
Mean QB RandomForestRegressor CV Score: 0.539
Mean QB XGB CV Score: 0.607


In [24]:
X_train_QB.drop(["name", "Season", "Pos", "Team"], axis = 1).columns

Index(['pass.att_player_prev', 'pass.comp_player_prev',
       'totalfumbs_player_prev', 'passyds_player_prev', 'pass.tds_player_prev',
       'pass.ints_player_prev', 'rush.att_player_prev', 'rushyds_player_prev',
       'rushtds_player_prev', 'rush.twoptm_player_prev',
       'pass.twoptm_player_prev', 'fgyds_player_prev',
       'next_Season_new_Team_prev', 'pass.att_new_Team_prev',
       'pass.comp_new_Team_prev', 'totalfumbs_new_Team_prev',
       'passyds_new_Team_prev', 'pass.tds_new_Team_prev',
       'pass.ints_new_Team_prev', 'rush.att_new_Team_prev',
       'rushyds_new_Team_prev', 'rushtds_new_Team_prev',
       'recyds_new_Team_prev', 'rec.tds_new_Team_prev', 'recept_new_Team_prev',
       'next_Season_old_Team_prev', 'pass.att_old_Team_prev',
       'pass.comp_old_Team_prev', 'totalfumbs_old_Team_prev',
       'passyds_old_Team_prev', 'pass.tds_old_Team_prev',
       'pass.ints_old_Team_prev', 'rush.att_old_Team_prev',
       'rushyds_old_Team_prev', 'rushtds_old_Team_pr

In [25]:
print('\nMean RB Linear Regression CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(LinearRegression(),
                                      X_train_RB.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_RB))))
print('Mean RB Ridge CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(Ridge(),
                                      X_train_RB.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_RB))))
print('Mean RB Lasso CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(Lasso(),
                                      X_train_RB.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_RB))))

print('Mean RB Elastic Net CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(ElasticNet(),
                                      X_train_RB.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_RB))))

print('Mean RB RandomForestRegressor CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(RandomForestRegressor(),
                                      X_train_RB.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_RB))))


print('Mean RB XGB CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(XGBRegressor(),
                                      X_train_RB.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_RB))))



Mean RB Linear Regression CV Score: 0.327
Mean RB Ridge CV Score: 0.353
Mean RB Lasso CV Score: 0.397
Mean RB Elastic Net CV Score: 0.397
Mean RB RandomForestRegressor CV Score: 0.355
Mean RB XGB CV Score: 0.400


In [26]:
print('\nMean WR Linear Regression CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(LinearRegression(),
                                      X_train_WR.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_WR))))
print('Mean WR Ridge CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(Ridge(),
                                      X_train_WR.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_WR))))
print('Mean WR Lasso CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(Lasso(),
                                      X_train_WR.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_WR))))

print('Mean WR Elastic Net CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(ElasticNet(),
                                      X_train_WR.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_WR))))

print('Mean WR RandomForestRegressor CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(RandomForestRegressor(),
                                      X_train_WR.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_WR))))


print('Mean WR XGB CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(XGBRegressor(),
                                      X_train_WR.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_WR))))


Mean WR Linear Regression CV Score: 0.433
Mean WR Ridge CV Score: 0.433
Mean WR Lasso CV Score: 0.447
Mean WR Elastic Net CV Score: 0.443
Mean WR RandomForestRegressor CV Score: 0.387
Mean WR XGB CV Score: 0.454


In [27]:
print('\nMean TE Linear Regression CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(LinearRegression(),
                                      X_train_TE.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_TE))))
print('Mean TE Ridge CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(Ridge(),
                                      X_train_TE.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_TE))))
print('Mean TE Lasso CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(Lasso(),
                                      X_train_TE.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_TE))))

print('Mean TE Elastic Net CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(ElasticNet(),
                                      X_train_TE.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_TE))))

print('Mean TE RandomForestRegressor CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(RandomForestRegressor(),
                                      X_train_TE.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_TE))))


print('Mean TE XGB CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(XGBRegressor(),
                                      X_train_TE.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_TE))))


Mean TE Linear Regression CV Score: -13169145841398.760
Mean TE Ridge CV Score: 0.426
Mean TE Lasso CV Score: 0.456
Mean TE Elastic Net CV Score: 0.454
Mean TE RandomForestRegressor CV Score: 0.322
Mean TE XGB CV Score: 0.380


In [28]:
print('\nMean K Linear Regression CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(LinearRegression(),
                                      X_train_K.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_K))))
print('Mean K Ridge CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(Ridge(),
                                      X_train_K.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_K))))
print('Mean K Lasso CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(Lasso(),
                                      X_train_K.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_K))))

print('Mean K Elastic Net CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(ElasticNet(),
                                      X_train_K.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_K))))

print('Mean K RandomForestRegressor CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(RandomForestRegressor(),
                                      X_train_K.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_K))))


print('Mean K XGB CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(XGBRegressor(),
                                      X_train_K.drop(["name", "Season", "Pos", "Team"], axis = 1),
                                      y_train_K))))


Mean K Linear Regression CV Score: -0.167
Mean K Ridge CV Score: -0.152
Mean K Lasso CV Score: 0.001
Mean K Elastic Net CV Score: 0.011
Mean K RandomForestRegressor CV Score: 0.257
Mean K XGB CV Score: 0.325


In [29]:
print('\nMean D Linear Regression CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(LinearRegression(),
                                      X_train_D.drop(["Season", "Team"], axis = 1),
                                      y_train_D))))
print('Mean D Ridge CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(Ridge(),
                                      X_train_D.drop(["Season", "Team"], axis = 1),
                                      y_train_D))))
print('Mean D Lasso CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(Lasso(),
                                      X_train_D.drop(["Season", "Team"], axis = 1),
                                      y_train_D))))

print('Mean D Elastic Net CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(ElasticNet(),
                                      X_train_D.drop(["Season", "Team"], axis = 1),
                                      y_train_D))))

print('Mean D RandomForestRegressor CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(RandomForestRegressor(),
                                      X_train_D.drop(["Season", "Team"], axis = 1),
                                      y_train_D))))


print('Mean D XGB CV Score: {0:.3f}'
      .format(np.mean(cross_val_score(XGBRegressor(),
                                      X_train_D.drop(["Season", "Team"], axis = 1),
                                      y_train_D))))


Mean D Linear Regression CV Score: -0.003
Mean D Ridge CV Score: 0.001
Mean D Lasso CV Score: 0.067
Mean D Elastic Net CV Score: 0.069
Mean D RandomForestRegressor CV Score: -0.060
Mean D XGB CV Score: -0.149
