# Fantasy Football Point Machine Learning Model

In [1]:
# initialize libraries
import numpy as np
import pandas as pd
# from pathlib import Path
from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import  StandardScaler
from sklearn.linear_model import LinearRegression
# from sklearn.metrics import balanced_accuracy_score
from sqlalchemy import create_engine
from config import db_password

In [2]:
# local host sql connection
db_string = f"postgresql://postgres:{db_password}@localhost:5433/NFL"
engine = create_engine(db_string)

In [3]:
# load in datafile

passing_df = pd.read_sql_table('clean_passing', con=engine)
receiving_df = pd.read_sql_table('clean_receiving', con=engine)
rushing_df = pd.read_sql_table('clean_rushing', con=engine)

current_passing_df = pd.read_sql_table('passing2021', con=engine)
current_receiving_df = pd.read_sql_table('receiving2021', con=engine)
current_rushing_df =  pd.read_sql_table('rushing2021', con=engine)

In [4]:
# define function for logistic regression 
def lin_reg(X_train, y_train):
    model = LinearRegression()
    model.fit(X_train, y_train)
    return model

In [5]:
# # define function for balance accuracy score
# def bal_acc(model, X_test, y_test):
#     y_pred = model.predict(X_test)
#     print(balanced_accuracy_score(y_test, y_pred))

In [6]:
# additional data transforms as necessary
passing_df = passing_df.dropna()
receiving_df = receiving_df.dropna()
rushing_df = rushing_df.dropna()

In [7]:
receiving_df['ctch'] = receiving_df['ctch'].str.rstrip('%').astype('float')
current_receiving_df['ctch'] = current_receiving_df['ctch'].str.rstrip('%').astype('float')

In [8]:
# define X and y

y_passing = passing_df["points"]
X_passing = passing_df.drop(["points","player","pos","player_additional"],1)

y_receiving = receiving_df["points"]
X_receiving = receiving_df.drop(["points","player","pos","player_additional"],1)

y_rushing = rushing_df["points"]
X_rushing = rushing_df.drop(["points","player","pos","player_additional"],1)


X_passing_current = current_passing_df.drop(["rk","points","player","pos","player_additional"],1)
X_receiving_current = current_receiving_df.drop(["rk","points","player","pos","player_additional"],1)
X_rushing_current = current_rushing_df.drop(["rk","points","player","pos","player_additional"],1)

  after removing the cwd from sys.path.
  import sys
  # Remove the CWD from sys.path while we load stuff.
  del sys.path[0]
  
  from ipykernel import kernelapp as app


In [9]:
# # additional data transforms as necessary
# X_passing = X_passing.dropna()
# X_receiving = X_receiving.dropna()
# X_rushing = X_rushing.dropna()

In [10]:
# split train and test

X_train_passing, X_test_passing, y_train_passing, y_test_passing = train_test_split(X_passing, y_passing)


X_train_receiving, X_test_receiving, y_train_receiving, y_test_receiving = train_test_split(X_receiving, y_receiving)


X_train_rushing, X_test_rushing, y_train_rushing, y_test_rushing = train_test_split(X_rushing, y_rushing)

In [11]:
# # Create a StandardScaler instances
# scaler = StandardScaler()

# # Fit the StandardScaler
# X_scaler_passing = scaler.fit(X_train_passing)

# # Scale the data
# X_train_scaled_passing = X_scaler_passing.transform(X_train_passing)
# X_test_scaled_passing = X_scaler_passing.transform(X_test_passing)
    
 

In [12]:
# perform logistic regression on passing model
passing_model = lin_reg(X_train_passing, y_train_passing)
print(passing_model.score(X_test_passing,y_test_passing))

0.0009905038526140109




In [13]:
# perform logistic regression on receiving model
receiving_model = lin_reg(X_train_receiving, y_train_receiving)
print(receiving_model.score(X_test_receiving,y_test_receiving))

0.5708333100875355




In [14]:
# perform logistic regression on rushing model
rushing_model = lin_reg(X_train_rushing, y_train_rushing)
print(rushing_model.score(X_test_rushing,y_test_rushing))

0.537679354080911




In [18]:
# apply models to current stats
y_passing_pred = passing_model.predict(X_passing_current)
passing_pred_df = pd.DataFrame()
passing_pred_df['Player'] = current_passing_df['player']
passing_pred_df['Position'] = current_passing_df['pos']
passing_pred_df['Points predicted'] = y_passing_pred

y_receiving_pred = receiving_model.predict(X_receiving_current)
receiving_pred_df = pd.DataFrame()
receiving_pred_df['Player'] = current_receiving_df['player']
receiving_pred_df['Position'] = current_receiving_df['pos']
receiving_pred_df['Points predicted'] = y_receiving_pred

y_rushing_pred = rushing_model.predict(X_rushing_current)
rushing_pred_df = pd.DataFrame()
rushing_pred_df['Player'] = current_rushing_df['player']
rushing_pred_df['Position'] = current_rushing_df['pos']
rushing_pred_df['Points predicted'] = y_rushing_pred



In [19]:
# aggregate duplicates 
pred_df = pd.concat([passing_pred_df,receiving_pred_df,rushing_pred_df], ignore_index=True)

pred_df = pred_df.groupby('Player').agg({'Points predicted':'sum'}).reset_index()

pred_df.head(10)


Unnamed: 0,Player,Points predicted
0,A.J. Brown,205.913791
1,A.J. Green,108.078783
2,AJ Dillon,148.027615
3,Aaron Jones,136.947152
4,Aaron Rodgers*+,203.011031
5,Adam Humphries,53.208331
6,Adam Prentice,14.592589
7,Adam Shaheen,24.111431
8,Adam Thielen,123.552444
9,Adam Trautman,60.788759


In [20]:
# write output

pred_df.to_csv('predictions.csv')