# Basic Bandit Implementation of Fantasy Football Reinforcement Learning

Article Link: 

In [None]:
from ImportLeague import getLeague
from ImportData import getTeamData
from ImportData import getPlayerData
import pandas as pd
# from sklearn import linear_model
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn')
from tqdm import tqdm 
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Import Data
myLeague = getLeague()
teamData_defense,teamData_offense = getTeamData()
playerData = getPlayerData(myLeague,teamData_defense,teamData_offense)

In [3]:
def get_active_roster(Roster):

    myRoster = pd.DataFrame(columns=['Position','Name','Proj Points'])

    # Create a dataframe of the full roster of players I own
    for player in Roster:
        playerDict = {}
        playerDict['Position'] = player.position
        playerDict['Name'] = player.name
        playerDict['Proj Points'] = player.stats[myLeague.nfl_week]['projected_points']
        myRoster = myRoster.append(playerDict, ignore_index = True)

    # Concatenate the n highest projected-points players with n being the allowed slots per position accross positions
    myRoster = pd.concat([myRoster[myRoster['Position'] == 'QB'].nlargest(1, ['Proj Points']),\
        myRoster[myRoster['Position'] == 'RB'].nlargest(2, ['Proj Points']),\
        myRoster[myRoster['Position'] == 'WR'].nlargest(3, ['Proj Points']),\
        myRoster[myRoster['Position'] == 'TE'].nlargest(1, ['Proj Points']),\
        myRoster[myRoster['Position'] == 'D/ST'].nlargest(1, ['Proj Points']),\
        myRoster[myRoster['Position'] == 'K'].nlargest(1, ['Proj Points'])])

    return myRoster


In [4]:
myRoster = get_active_roster(myLeague.teams[8].roster)
myRoster = myRoster.reset_index(drop=True)
myRoster

Unnamed: 0,Position,Name,Proj Points
0,QB,Dak Prescott,22.05
1,RB,Saquon Barkley,14.4
2,RB,Chase Edmonds,14.32
3,WR,Amari Cooper,12.08
4,WR,Marvin Jones Jr.,11.71
5,WR,Antonio Brown,11.46
6,TE,Noah Fant,8.62
7,D/ST,Broncos D/ST,7.18
8,K,Mason Crosby,8.08


In [6]:
# Find sizes of each free agency pool
sizes = []

for position in ['QB','RB','WR','TE','D/ST','K']:
    sizes.append(playerData[position][playerData[position]['Free Agency'] == True].shape[0])

In [7]:
def get_action(epsilon,Q_val,sizes_Dict,position_Dict):
    p_epsilon = np.random.uniform(0,1)
    if p_epsilon < epsilon:
        position = np.random.randint(9)
        free_agent = np.random.randint(sizes_Dict[position_Dict[position]])
        return (position,free_agent)

    action = np.unravel_index(Q_val.argmax(), Q_val.shape)
    while action[1] > (sizes_Dict[position_Dict[action[0]]] -1):
        Q_val[action[0],action[1]] = -100
        action = np.unravel_index(Q_val.argmax(), Q_val.shape)



    return action


In [9]:
# Sets Q-value table and vistis table to calculate inctemental averages
Q_val = np.zeros([9,max(sizes)])
n_s = np.zeros([9,max(sizes)])
sizes_Dict = dict(zip(['QB','RB','WR','TE','D/ST','K'], sizes))
position_Dict = dict(zip(range(9),['QB','RB','RB','WR','WR','WR','TE','D/ST','K']))
for slot in range(9):
    for null in range(sizes_Dict[position_Dict[slot]],max(sizes)): Q_val[slot][null] = -100

In [10]:
# Hyperparms
NUM_EPISODES = 1000
EPSILON = 0.3

# Baseline project points to calculate reward
Baseline = myRoster['Proj Points'].sum()

# Loop through episodes
for episode in tqdm(range(NUM_EPISODES)):

    newRoster = myRoster.copy() # make a cope of the original active roster

    action = get_action(EPSILON,Q_val,sizes_Dict,position_Dict) # get action

    # Use the given action to substitute the new player into the roster 
    position = position_Dict[action[0]]
    newRoster['Name'][action[0]] = playerData[position][playerData[position]['Free Agency'] == True].iloc[action[1]]['Name']
    newRoster['Proj Points'][action[0]] = playerData[position][playerData[position]['Free Agency'] == True].iloc[action[1]]['Projected Points']

    # Calculate the reward as the new total minus the baseline
    reward = newRoster['Proj Points'].sum() - Baseline

    # Update vistis counter and Q-table
    n_s[action[0],action[1]] += 1
    Q_val[action[0],action[1]] = Q_val[action[0],action[1]] + 1/n_s[action[0],action[1]] * (reward- Q_val[action[0],action[1]])

100%|██████████| 1000/1000 [00:02<00:00, 391.15it/s]


In [11]:
top_picks = np.unravel_index(np.argpartition(Q_val.flatten(), -4)[-4:], Q_val.shape)

recommends = pd.concat([playerData[position_Dict[top_picks[0][3]]][playerData[position_Dict[top_picks[0][3]]]['Free Agency'] == True].iloc[top_picks[1][3]],\
    playerData[position_Dict[top_picks[0][2]]][playerData[position_Dict[top_picks[0][2]]]['Free Agency'] == True].iloc[top_picks[1][2]],\
    playerData[position_Dict[top_picks[0][1]]][playerData[position_Dict[top_picks[0][1]]]['Free Agency'] == True].iloc[top_picks[1][1]],\
    playerData[position_Dict[top_picks[0][0]]][playerData[position_Dict[top_picks[0][0]]]['Free Agency'] == True].iloc[top_picks[1][0]]],axis=1).T

recommends = recommends[['Name','Projected Points']]
pos = []
for i in range(4): pos.append(position_Dict[top_picks[0][0]])
recommends['Postion'] =  np.flip(pos)
recommends['Q-Value'] = np.flip(Q_val[top_picks[0],top_picks[1]])

In [17]:
recommends.sort_values(by=['Q-Value'], ascending=False)

Unnamed: 0,Name,Projected Points,Postion,Q-Value
12,Harrison Butker,8.71,K,0.63
17,Greg Joseph,8.06,K,-0.02
23,Falcons D/ST,7.13,K,-0.05
16,Rodrigo Blankenship,7.7,K,-0.38
