In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import sklearn as skl

In [2]:
#Create file paths
csv_path1 = Path("NFL_Fantasy_Files/FantasyPros_2019.csv")
csv_path2 = Path("NFL_Fantasy_Files/FantasyPros_2020.csv")
csv_path3 = Path("NFL_Fantasy_Files/FantasyPros_2021.csv")
csv_path4 = Path("NFL_Fantasy_Files/FantasyPros_2022.csv")
csv_path5 = Path("NFL_Fantasy_Files/FantasyPros_2023.csv")

#Create DataFrames
df1 = pd.read_csv(csv_path1)
df2 = pd.read_csv(csv_path2)
df3 = pd.read_csv(csv_path3)
df4 = pd.read_csv(csv_path4)
df5 = pd.read_csv(csv_path5)

# Add "year" feature
df1["Year"] = 2019
df2["Year"] = 2020
df3["Year"] = 2021
df4["Year"] = 2022
df5["Year"] = 2023

# Merge DataFrames
all_df = pd.concat([df1, df2, df3, df4, df5], axis = 0)
print(all_df.shape)
all_df.head()


(3557, 25)


Unnamed: 0,#,Player,Pos,Team,1,2,3,4,5,6,...,12,13,14,15,16,17,AVG,TTL,Year,18
0,1,Lamar Jackson,QB,BAL,33.6,30.9,21.3,26.5,14.4,30.6,...,36.3,22.3,20.8,37.1,29.8,-,28.1,421.7,2019,
1,2,Christian McCaffrey,RB,SF,32.9,5.3,24.8,23.9,41.7,17.7,...,25.3,10.2,13.5,29.5,17.3,15.8,22.2,355.2,2019,
2,3,Dak Prescott,QB,DAL,33.4,28.7,23.5,8.6,26.2,18.2,...,7.9,23.7,17.5,17.7,11.3,29.6,21.8,348.8,2019,
3,4,Jameis Winston,QB,CLE,10.1,13.2,26.5,30.3,17.5,17.1,...,26.3,10.7,37.7,33.7,15.0,16.5,21.0,335.4,2019,
4,5,Russell Wilson,QB,PIT,16.6,24.2,41.3,14.3,29.9,28.9,...,10.5,17.9,11.6,19.3,11.0,20.2,20.9,333.6,2019,


In [3]:
# Replace all missing values in column 18 with zero
all_df["18"].fillna(0, inplace=True)

# Replace anything in DF other than an integer with zero
all_df = all_df.replace("-", 0)
all_df = all_df.replace("BYE", 0)


all_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3557 entries, 0 to 706
Data columns (total 25 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   #       3557 non-null   int64  
 1   Player  3557 non-null   object 
 2   Pos     3557 non-null   object 
 3   Team    3557 non-null   object 
 4   1       3557 non-null   object 
 5   2       3557 non-null   object 
 6   3       3557 non-null   object 
 7   4       3557 non-null   object 
 8   5       3557 non-null   object 
 9   6       3557 non-null   object 
 10  7       3557 non-null   object 
 11  8       3557 non-null   object 
 12  9       3557 non-null   object 
 13  10      3557 non-null   object 
 14  11      3557 non-null   object 
 15  12      3557 non-null   object 
 16  13      3557 non-null   object 
 17  14      3557 non-null   object 
 18  15      3557 non-null   object 
 19  16      3557 non-null   object 
 20  17      3557 non-null   object 
 21  AVG     3557 non-null   float64
 22  TTL   

In [4]:
# List all columns
all_df.columns

Index(['#', 'Player', 'Pos', 'Team', '1', '2', '3', '4', '5', '6', '7', '8',
       '9', '10', '11', '12', '13', '14', '15', '16', '17', 'AVG', 'TTL',
       'Year', '18'],
      dtype='object')

In [5]:
# Change necessary column types
all_df[['1', '2', '3', '4', '5', '6', '7', '8',
       '9', '10', '11', '12', '13', '14', '15', '16', '17', '18']] = all_df[['1', '2', '3', '4', '5', '6', '7', '8',
       '9', '10', '11', '12', '13', '14', '15', '16', '17', '18']].astype("float")

In [6]:
# Verify column types have been changed
all_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3557 entries, 0 to 706
Data columns (total 25 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   #       3557 non-null   int64  
 1   Player  3557 non-null   object 
 2   Pos     3557 non-null   object 
 3   Team    3557 non-null   object 
 4   1       3557 non-null   float64
 5   2       3557 non-null   float64
 6   3       3557 non-null   float64
 7   4       3557 non-null   float64
 8   5       3557 non-null   float64
 9   6       3557 non-null   float64
 10  7       3557 non-null   float64
 11  8       3557 non-null   float64
 12  9       3557 non-null   float64
 13  10      3557 non-null   float64
 14  11      3557 non-null   float64
 15  12      3557 non-null   float64
 16  13      3557 non-null   float64
 17  14      3557 non-null   float64
 18  15      3557 non-null   float64
 19  16      3557 non-null   float64
 20  17      3557 non-null   float64
 21  AVG     3557 non-null   float64
 22  TTL   

In [7]:
# View DF
all_df.head()

Unnamed: 0,#,Player,Pos,Team,1,2,3,4,5,6,...,12,13,14,15,16,17,AVG,TTL,Year,18
0,1,Lamar Jackson,QB,BAL,33.6,30.9,21.3,26.5,14.4,30.6,...,36.3,22.3,20.8,37.1,29.8,0.0,28.1,421.7,2019,0.0
1,2,Christian McCaffrey,RB,SF,32.9,5.3,24.8,23.9,41.7,17.7,...,25.3,10.2,13.5,29.5,17.3,15.8,22.2,355.2,2019,0.0
2,3,Dak Prescott,QB,DAL,33.4,28.7,23.5,8.6,26.2,18.2,...,7.9,23.7,17.5,17.7,11.3,29.6,21.8,348.8,2019,0.0
3,4,Jameis Winston,QB,CLE,10.1,13.2,26.5,30.3,17.5,17.1,...,26.3,10.7,37.7,33.7,15.0,16.5,21.0,335.4,2019,0.0
4,5,Russell Wilson,QB,PIT,16.6,24.2,41.3,14.3,29.9,28.9,...,10.5,17.9,11.6,19.3,11.0,20.2,20.9,333.6,2019,0.0


In [8]:
#Store list of player rank, player, and position
players = all_df["Player"]

In [9]:
# Drop unnecessary columns
all_df = all_df.drop(columns=["Player", "Team", "17", "AVG", "18"])
all_df.head()

Unnamed: 0,#,Pos,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,TTL,Year
0,1,QB,33.6,30.9,21.3,26.5,14.4,30.6,23.3,0.0,28.6,33.4,32.8,36.3,22.3,20.8,37.1,29.8,421.7,2019
1,2,RB,32.9,5.3,24.8,23.9,41.7,17.7,0.0,23.5,34.6,20.1,19.1,25.3,10.2,13.5,29.5,17.3,355.2,2019
2,3,QB,33.4,28.7,23.5,8.6,26.2,18.2,21.6,0.0,22.5,26.9,31.6,7.9,23.7,17.5,17.7,11.3,348.8,2019
3,4,QB,10.1,13.2,26.5,30.3,17.5,17.1,0.0,21.3,20.2,20.3,18.8,26.3,10.7,37.7,33.7,15.0,335.4,2019
4,5,QB,16.6,24.2,41.3,14.3,29.9,28.9,15.3,15.7,39.2,17.6,0.0,10.5,17.9,11.6,19.3,11.0,333.6,2019


In [26]:
cleaned_data = all_df.to_csv("FinalProject", header=True, index=True)

In [10]:
# Seperate the features, X,  from the target variable, y
y = all_df['TTL']
X = all_df.drop(columns='TTL')

In [11]:
# Preview the features data
X.head()

Unnamed: 0,#,Pos,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,Year
0,1,QB,33.6,30.9,21.3,26.5,14.4,30.6,23.3,0.0,28.6,33.4,32.8,36.3,22.3,20.8,37.1,29.8,2019
1,2,RB,32.9,5.3,24.8,23.9,41.7,17.7,0.0,23.5,34.6,20.1,19.1,25.3,10.2,13.5,29.5,17.3,2019
2,3,QB,33.4,28.7,23.5,8.6,26.2,18.2,21.6,0.0,22.5,26.9,31.6,7.9,23.7,17.5,17.7,11.3,2019
3,4,QB,10.1,13.2,26.5,30.3,17.5,17.1,0.0,21.3,20.2,20.3,18.8,26.3,10.7,37.7,33.7,15.0,2019
4,5,QB,16.6,24.2,41.3,14.3,29.9,28.9,15.3,15.7,39.2,17.6,0.0,10.5,17.9,11.6,19.3,11.0,2019


In [12]:
# Preview the first five entries for the target variable
y[:5]

0    421.7
1    355.2
2    348.8
3    335.4
4    333.6
Name: TTL, dtype: float64

In [13]:
# Encode "position" using get_dummies
X = pd.get_dummies(X)

In [14]:
# Review the features data
X.head()

Unnamed: 0,#,1,2,3,4,5,6,7,8,9,...,15,16,Year,Pos_DST,Pos_K,Pos_QB,Pos_RB,Pos_S,Pos_TE,Pos_WR
0,1,33.6,30.9,21.3,26.5,14.4,30.6,23.3,0.0,28.6,...,37.1,29.8,2019,False,False,True,False,False,False,False
1,2,32.9,5.3,24.8,23.9,41.7,17.7,0.0,23.5,34.6,...,29.5,17.3,2019,False,False,False,True,False,False,False
2,3,33.4,28.7,23.5,8.6,26.2,18.2,21.6,0.0,22.5,...,17.7,11.3,2019,False,False,True,False,False,False,False
3,4,10.1,13.2,26.5,30.3,17.5,17.1,0.0,21.3,20.2,...,33.7,15.0,2019,False,False,True,False,False,False,False
4,5,16.6,24.2,41.3,14.3,29.9,28.9,15.3,15.7,39.2,...,19.3,11.0,2019,False,False,True,False,False,False,False


In [16]:
# Use sklearn to split dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

# Create scaler instance
X_scaler = skl.preprocessing.StandardScaler()

# Fit the scaler
X_scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Create the Keras Sequential model
nn_model = tf.keras.models.Sequential()

# Add first Dense layer
nn_model.add(tf.keras.layers.Dense(units=1, activation="relu", input_dim=25))

# Add the output layer
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn_model.summary()

NameError: name 'tf' is not defined

In [None]:
# Compile the model together and choose metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Fit the model to the training data
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=100)

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Create a new neural network model with 6 neurons
nn_model2 = tf.keras.models.Sequential()

nn_model2.add(tf.keras.layers.Dense(units=6, activation="relu", input_dim=25))

nn_model2.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

nn_model2.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

fit_model = nn_model2.fit(X_train_scaled, y_train, epochs=100)

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model2.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Create a new neural network model with 12 neurons
nn_model2 = tf.keras.models.Sequential()

nn_model2.add(tf.keras.layers.Dense(units=12, activation="relu", input_dim=25))

nn_model2.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

nn_model2.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

fit_model = nn_model2.fit(X_train_scaled, y_train, epochs=100)

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model2.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")