In [5]:
from sklearn.feature_selection import RFECV
from sklearn.model_selection import StratifiedKFold

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer, Dropout
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

from sklearn.preprocessing import StandardScaler

data = pd.read_pickle("data/main.df")
data = data.sample(frac=1).reset_index(drop=True)
# One hot encode position - according to what they can play
data["F_pos"] = data["Pos"].apply(lambda x: "F" in x)
data["C_pos"] = data["Pos"].apply(lambda x: "C" in x)
data["G_pos"] = data["Pos"].apply(lambda x: "F" in x)
data = data.drop("Pos", axis=1)

# Address missing values
na_zero_cols = ["TS%", "3P%", "3PAr", "FTr", "TOV%", "FG%", "2P%", "eFG%", "FT%"]
for c in na_zero_cols: data[c].fillna(0, inplace=True)
    
# Add some per game features here
pg_features = ["MP", "3P", "3PA", "2P", "2PA", "FT", "FTA", "ORB", "DRB",'TRB','AST','STL','BLK','TOV','PF','PTS']
for feature in pg_features:
    data[feature] = data.apply(lambda x: x[feature]/x["G"], axis=1)

target = "salary"
x_cols = ["Age", "G", "GS", "MP", "PER", "TS%", '3PAr','FTr','ORB%','DRB%','TRB%','AST%','STL%','BLK%',
          'TOV%','USG%','OWS','DWS','WS','WS/48','OBPM','DBPM','BPM','VORP','FG','FGA','FG%','3P','3PA','3P%',
          '2P','2PA','2P%','eFG%','FT','FTA','FT%','ORB','DRB','TRB','AST','STL','BLK','TOV','PF','PTS','age',
          'ows_48','dws_48','shot','team_mar','team_mp','team_gm','year_3par','team_ts','tm_usg','tm_ts_w_o_plyr',
          'reb_vers','defense','val_shot','offense','mpg_int','raw_spm','raw_contrib','tm_sum','tm_adj','bpm_2',
          'stderr','contrib','vorp_2','reb_vers_2','val_shot_2','offense_2','defense_2','raw_obpm','contrib_2',
          'tm_ortg','tm_sum_2','tm_adj_2','tm_drtg','obpm_2','ostderr','ocontrib','ovorp','dbpm_2','dstderr',
          'dcontrib','dvorp','sum_spm','bpm_3','contrib_3','vorp_3','vorp_gm','o_bpm','ocontrib_2','ovorp_2',
          'ovorp_gm','d_bpm','dcontrib_2','dvorp_2','dvorp_gm','exp_bpm','truetalentbpm',
          'exp_min','truetalentvorp','truetimevorp','worp','o_worp','d_worp','height','weight',
          'adjusted_worp','estimated_position','bbref_pos','age_on_feb_1','yrs_experience', 
          "F_pos", "G_pos", "C_pos"]

# Scale the columns - not including binary columns 
scale_cols = [col for col in x_cols if col not in ["F_pos", "G_pos", "C_pos"]]
scaler = StandardScaler()
data[x_cols] = scaler.fit_transform(data[x_cols])
X, Y = data[x_cols], data[target]

In [6]:
# Do recursive feature elimination
N = len(x_cols)
model = Sequential()
model.add(Dense(N, input_dim=N, kernel_initializer='normal', activation='sigmoid'))
model.add(Dense(int(np.sqrt(N)), kernel_initializer='normal', activation='sigmoid'))
model.add(Dropout(0.5))
model.add(Dense(1, kernel_initializer='normal'))

# Compile model
model.compile(loss='mean_squared_error', optimizer='adam')

min_features_to_select = 1  # Minimum number of features to consider
rfecv = RFECV(estimator=model, step=1, cv=StratifiedKFold(2),
              scoring='mean_square_error',
              min_features_to_select=min_features_to_select)
rfecv.fit(X, Y)

print("Optimal number of features : %d" % rfecv.n_features_)

# Plot number of features VS. cross-validation scores
plt.figure()
plt.xlabel("Number of features selected")
plt.ylabel("Cross validation score (nb of correct classifications)")
plt.plot(range(min_features_to_select,
               len(rfecv.grid_scores_) + min_features_to_select),
         rfecv.grid_scores_)
plt.show()

AttributeError: 'Sequential' object has no attribute '_get_tags'