In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import collections
import time
import datetime

from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

pd.options.mode.chained_assignment = None
pd.options.display.max_columns = 999

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# Dumb "Best Pick" Drafting Agent
# When drafting, this agent will always pick the highest ranked player as long as it complies with the team roster requirements

# TODO: Punt Agent, Okay Pick Agent

class BestPickAgent:
  def __init__(self, id, variance):
    # Unique id to identify the agent. We start indexing at 1
    self.id = id+1
    self.type = "BestPick"
    self.variance = variance
    
    # List of player names on this agents team 
    self.team = []
    
    # The total stats of this agents team. Calculated as the sum of the z-scores of each players stats
    self.stats = {'FG%_z':0,'3P_z':0,'FT%_z':0,'TRB_z':0,'AST_z':0,'STL_z':0,'BLK_z':0,'TOV_z':0,'PTS_z':0, 'Overall_z':0}
    
    # The dumb agent is not allowed to draft more than 6 guards, 6 forwards, and 4 centers.
    # We give each position a number representation where: PG = 1, SG = 2, SF = 3, PF = 4, C = 5
    self.roster = {1:0, 2:0, 3:0, 4:0, 5:0}
  
  def draft(self, pool, names):
    pool.loc[pool['Overall_z'].idxmax()]
    # We set a restriction if the agent has too many players of a certain position
    # Also restricted to only choose from available players
    res = "TEAM == 0"
    if self.roster[1] + self.roster[2] >= 6:
      res += " and POS != 1 and POS != 2"
    if self.roster[3] + self.roster[4] >= 6:
      res += " and POS != 3 and POS != 4"
    if self.roster[5] >= 4:
      res += " and POS != 5"
    
    # Randomly choose a player from the top x available choices. Where x is the variance of the agent
    choice = random.randrange(self.variance)
    # Get the index of the chosen player
    ind = pool.query(res).nlargest(choice+1,'Overall_z').iloc[[choice]].index.item()
    
    # Set 'team' of the chosen player to the agents id
    pool.loc[ind, 'TEAM'] = self.id
    
    # Transform the player dataframe to a dictionary and remove the unnecessary keys
    newPlayer = pool.loc[ind].to_dict()
    self.team.append(newPlayer.pop('ID')) # Add player id to the agents team
    newPlayer.pop('TEAM')
    self.roster[newPlayer.pop('POS')] += 1 # Add the players position to the agents roster
    
    # Add the stats of the new player to the current teams stats
    for key in self.stats.keys():
        self.stats[key] += newPlayer[key]

    return pool, ind
  
  def reset(self):
    self.team = []
    self.stats = {'FG%_z':0,'3P_z':0,'FT%_z':0,'TRB_z':0,'AST_z':0,'STL_z':0,'BLK_z':0,'TOV_z':0,'PTS_z':0, 'Overall_z':0}
    self.roster = {1:0, 2:0, 3:0, 4:0, 5:0}

In [3]:
# Function that calculates the reward by simulating matchups against other agents
def getReward(agents):
    total = 0
    text = ""
    cats = ['FG%_z','3P_z','FT%_z','TRB_z','AST_z','STL_z','BLK_z','TOV_z','PTS_z']
    # Get the DQN agents stats
    for agent in agents:
        if agent.type == "DQN":
            dqnStats = agent.stats
            rnd = len(agent.team)
            text += "DQN "
            for cat in cats:
                text += " {}:{:.2f}".format(cat, dqnStats[cat])
    
    for agent in agents:
        if agent.type != "DQN":
            won = 0
            lost = 0
            tie = 0
            text += "\nvs {}".format(agent.id)
            for cat in cats:
                text += " {}:{:.2f}".format(cat, dqnStats[cat] - agent.stats[cat])
                if dqnStats[cat] > agent.stats[cat]:
                    won += 1
                elif dqnStats[cat] <= agent.stats[cat]:
                    lost += 1
                #else:
                    #tie += 1
                    
            if won >= 5:
                # Big incentive for winning + bonus for each cat won
                # Rewards increase in later rounds
                #discount = rnd / 10
                discount = 1
                total += (10 + (3.33 * (won-5)))*discount
            elif lost >= 5:
                # Negative reward for every cat lost
                #discount = rnd / 10
                discount = 1
                total -= (10 + (3.33 * (lost-5)))*discount

            #text += (" Won: {} Lost: {} Tie:{}\n".format(won, lost, tie))
            text += (" Won: {} Lost: {}".format(won, lost))

    return total, text

In [4]:
# Function to create a random pool of players
def createPool(size, draftOrder):
    # We randomly generate the z-scores of each stat
    pool = pd.DataFrame(columns=['ID', 'POS', 'FG%_z', 'FT%_z', '3P_z', 'TRB_z', 'AST_z', 'STL_z', 'BLK_z', 'TOV_z', 'PTS_z', 'TEAM'])

    '''
    tmp = []
    for agent in draftOrder:
    tmp.append(agent.id)
    tmp = np.array(tmp)
    '''
  
    for i in range(size):
        # We give each position a number representation where: PG = 1, SG = 2, SF = 3, PF = 4, C = 5
        rand = np.random.normal(scale=2, size=9)
        pool.loc[i] = [i, random.randint(1,5),rand[0],rand[1],rand[2],rand[3],rand[4],rand[5],rand[6],rand[7],rand[8],0]

    # Create position archetypes (Centers get more rebs and blks etc.)

    # Add an overall_z column which is just the sum of all the z-scores. This is treated as the players overall ranking.  
    pool['Overall_z'] = (pool['FG%_z']+pool['3P_z']+pool['FT%_z']+pool['TRB_z']+pool['AST_z']+pool['STL_z']+pool['BLK_z']+pool['TOV_z']+pool['PTS_z'])

    '''
    for x in range(20):
    pos = "pos_" + str(x)
    if x >= len(tmp):
      pool[pos] = 0
    else:
      pool[pos] = tmp[x]
    '''
    return pool

In [5]:
def getNBAPool():
    # Download the Avg stats of each player from the 2017-2018 season, from my gdrive
    df = pd.read_csv('Avg_Player_Stats_18_19.csv')

    # Drop unimportant columns
    df.drop(columns=['Rk', 'Age', 'Tm', 'GS', 'MP', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'ORB', 'DRB', 'PF'], inplace=True)

    # Remove players with 20 or less games played
    df.query("G>20", inplace=True)

    # Calculate the z-score of each players stats
    # We first calculate for the all of the players. Then we keep the top 250 players, then re-calculate z-score once more

    cols = ['FG%', '3P', 'FT%', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PTS']

    for x in range(2):
        for col in cols:
            col_zscore = col + '_z'
            if col == 'TOV':
              # More turnovers are bad, so we need to interchange some values for this stat
              df[col_zscore] = (df[col].mean() - df[col])/df[col].std(ddof=0)
            elif col == 'FG%':
              # We can't just get the z-score for FG% because we also need to account for how many FG attempts this player makes
              # Instead we get the z-scores for a players impact where:
              # impact = difference * attempts
              # difference = FG% - avgLeagueFG%

              diff = df['FG%'] - (df['FG'].sum()/df['FGA'].sum())
              df['FG_IMP'] = diff * df['FGA']
              df[col_zscore] = (df['FG_IMP'] - df['FG_IMP'].mean())/df['FG_IMP'].std(ddof=0)

            elif col == 'FT%':
              # For FT%, we use the same logic as FG%
              diff = df['FT%'] - (df['FT'].sum()/df['FTA'].sum())
              df['FT_IMP'] = diff * df['FTA'] 
              df[col_zscore] = (df['FT_IMP'] - df['FT_IMP'].mean())/df['FT_IMP'].std(ddof=0)

            else:
              # Default z-score equation
              # z-score = (actual.stat-mean)/std.dev
              df[col_zscore] = (df[col] - df[col].mean())/df[col].std(ddof=0)

        # Add an overall_z column which is just the sum of all the z-scores. This is treated as the players overall ranking.  
        df['Overall_z'] = (df['FG%_z']+df['3P_z']+df['FT%_z']+df['TRB_z']+df['AST_z']+df['STL_z']+df['BLK_z']+df['TOV_z']+df['PTS_z'])

        # Change the POS field to integers
        df.loc[df['POS'] == 'PG', 'POS'] = 1
        df.loc[df['POS'] == 'SG', 'POS'] = 2  
        df.loc[df['POS'] == 'SF', 'POS'] = 3  
        df.loc[df['POS'] == 'PF', 'POS'] = 4  
        df.loc[df['POS'] == 'C', 'POS'] = 5
        
        

        # Add a column to show which team the player belongs to. 0 means the player has not been drafted
        df['TEAM'] = 0

        # We then only keep the top 250 players, and then re-calculate the z-scores one more time
        df = df.nlargest(280, 'Overall_z')


        
    df.drop(df.columns.difference(['ID', 'POS', 'FG%_z', 'FT%_z', '3P_z', 'TRB_z', 'AST_z', 'STL_z', 'BLK_z', 'TOV_z', 'PTS_z', 'Overall_z', 'TEAM']), 1, inplace=True)
    # Change the ID column to integers
    playerNames = df['ID'].tolist()
    df['ID'] = range(280)
    
    df = df.reset_index(drop=True)
    return df, playerNames

In [6]:
# Helper function that creates a 1d state using the pool and the agents
# (280*13) + (12) + (12*11)
# (pool*playerInfo) + (numTeams*cats)
def createState(df, agents):
    front = df.values.flatten()
    
    mid = []
    for agent in agents:
        mid.append(agent.id)
        
    back = [[] for _ in range(12)]
    for agent in agents:
        back[agent.id-1].append(agent.id)
        for cat in agent.stats.keys():
            back[agent.id-1].append(agent.stats[cat])
    

    return np.concatenate((front, np.array(mid).flatten(), np.array(back).flatten()))

# Helper function to unpack the state so the DQN agent can use it
def unpackState(state):
    tmp = state[:3640]
    tmp = tmp.reshape(280,13)
    df = pd.DataFrame(data=tmp)
    df.columns = ['ID', 'POS', 'FG%_z', '3P_z', 'FT%_z', 'TRB_z', 'AST_z', 'STL_z',
       'BLK_z', 'TOV_z', 'PTS_z', 'Overall_z', 'TEAM']
    return df

In [7]:
# Fantasy Drafting Environment
# Modeled after the Gym environments: https://www.novatec-gmbh.de/en/blog/creating-a-gym-environment/
class DraftEnvironment:
    def __init__(self, poolSize, DQNAgent, teamCount):
        self.teamCount = teamCount
        self.poolSize = poolSize
        self.draftNum = 0
        # Initialize the other "dumb draft agents" and the DQN agent placeholder
        self.agents = []
        for x in range(self.teamCount-1):
            tmpAgent = MyDQNAgent(poolSize, x+1)
            tmpAgent.load()
            self.agents.append(tmpAgent)
        # Initialize the player pool
        #self.pool = pd.DataFrame(columns=['ID', 'POS', 'FG%_z', 'FT%_z', '3P_z', 'TRB_z', 'AST_z', 'STL_z', 'BLK_z', 'TOV_z', 'PTS_z', 'TEAM'])
        #self.pool = createPool(self.poolSize, self.agents)
        self.pool, self.playerNames = getNBAPool() #TODO: Switch back to randomized pools
        # The state of the pool before the current action
        self.prevPool = pd.DataFrame(columns=['ID', 'POS', 'FG%_z', 'FT%_z', '3P_z', 'TRB_z', 'AST_z', 'STL_z', 'BLK_z', 'TOV_z', 'PTS_z', 'TEAM'])
        # Used for rendering how rewards is calculated
        self.rewardText = ""
        # Used for rendering the draft picks
        self.draftHistory = []

    def step(self, action):
        # Action is the index of the player being drafted

        # Mark the DQN's player as drafted
        self.prevPool = self.pool
        self.pool.loc[action, 'TEAM'] = 1
        self.draftNum += 1
        self.draftHistory.append((1, action))

        # Dumb agents after the DQN agent drafts
        while True: 
            if self.draftNum >= self.teamCount:
                # Draft is done when everyone has 13 players
                if len(self.agents[self.draftNum-1].team) >= 13:
                    #return np.array([[self.pool.values, self.prevPool.values]]), getReward(self.agents), 1, 0
                    reward, self.rewardText = getReward(self.agents)
                    return createState(self.pool, self.agents), reward, 1, 0

                # Simulate snake draft  
                self.agents.reverse()
                self.draftNum = 0
                # Calculate the reward
                reward, self.rewardText = getReward(self.agents)

            if self.agents[self.draftNum].type != "DQN" and len(self.agents[self.draftNum].team) < 13:
                self.prevPool = self.pool
                self.pool, ind = self.agents[self.draftNum].draft(self.pool, self.playerNames)
                self.draftHistory.append((self.agents[self.draftNum].id, ind))
                self.draftNum += 1
            else:
                break

        # Returns next_state, reward, done, info
        return createState(self.pool, self.agents), reward, 0, 0

    def reset(self):
        # Shuffle draft order
        random.shuffle(self.agents)
        self.draftNum = 0
        self.draftHistory = []
        # Randomize the player pool
        #self.pool = createPool(self.poolSize, self.agents)
        self.pool['TEAM'] = 0
        # Clear all teams and return a clean "state"
        for agent in self.agents:
            agent.reset()   
        # Draft players for all dumb agents in earlier draft position than DQN agent
        while True:
            ind = 0
            if self.agents[self.draftNum].type != "DQN":
                self.pool, ind = self.agents[self.draftNum].draft(self.pool, self.playerNames)
                self.draftHistory.append((self.agents[self.draftNum].id, ind))
                self.draftNum += 1
            else:
                break

        return createState(self.pool, self.agents)

    def render(self):
        print(self.rewardText)
        
        '''
        cnt = 0
        for x in range(13):         
            print("Round {}".format(x+1))
            for y in range(self.teamCount):
                print("Agent {} drafts {}".format(self.draftHistory[cnt+y][0], self.playerNames[self.draftHistory[cnt+y][1]]))
            print("")
            cnt += self.teamCount
        '''
        return

In [8]:
# Deep Q-learning Agent
# Taken from https://keon.io/deep-q-learning/ with some adjustments
class MyDQNAgent:
    def __init__(self, pool_size, id):
        self.type = "DQN"
        self.id = id
        self.team = []   
        self.stats = {'FG%_z':0,'3P_z':0,'FT%_z':0,'TRB_z':0,'AST_z':0,'STL_z':0,'BLK_z':0,'TOV_z':0,'PTS_z':0, 'Overall_z':0}   
        self.roster = {1:0, 2:0, 3:0, 4:0, 5:0}
        # State size = current and prev frame * # of players in the pool * # of variables per player
        #self.state_size = 2 * pool_size * 13
        # State size = # of players in the pool * # of variables per player
        self.state_size = pool_size * 13 + 12 + 12 * 11
        self.action_size = pool_size
        self.memory = collections.deque(maxlen=8000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 0.01  # exploration rate
        self.epsilon_min = 0.01 ###
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()
        
    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        
        # Is 256 nodes too much?
        model.add(Dense(128, input_shape=(self.state_size,), activation='relu'))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse',
                      optimizer=Adam(lr=self.learning_rate))

        return model
      
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
        
    def act(self, state):
        # Agent is only able to choose from undrafted players
        # TODO: The agent is also not allowed to draft too many players of a certain position (How to implement this in replay()?)
        
        res = "TEAM == 0"
        '''
        if self.roster[1] + self.roster[2] >= 6:
          res += " and POS != 1 and POS != 2"
        if self.roster[3] + self.roster[4] >= 6:
          res += " and POS != 3 and POS != 4"
        if self.roster[5] >= 4:
          res += " and POS != 5"
        '''
        unp = unpackState(state)
        
        if np.random.rand() <= self.epsilon:
            # Get the index of the randomly chosen undrafted player
            # Instead of getting a random player, we choose randomly from the top 15 players
            # TODO: Try fully random instead of top 15
            choice = random.randrange(15)
            ind = unp.query(res).nlargest(choice+1,'Overall_z').iloc[[choice]].index.item()

        else:
            df = unp.copy()
            # act_values is the predicted reward if an action is taken
            act_values = self.model.predict(np.array([state]))
            df["act_values"] = act_values[0]         
          
            # Get the index of the undrafted player with the largest 'act_value'
            ind = df.query(res).nlargest(1, 'act_values').index.item()

        # Transform the player dataframe to a dictionary and remove the unnecessary keys
        newPlayer = unp.loc[ind].to_dict()
        self.team.append(newPlayer.pop('ID')) # Add player id to the agents team
        newPlayer.pop('TEAM')
        self.roster[newPlayer.pop('POS')] += 1 # Add the players position to the agents roster

        # Add the stats of the new player to the current teams stats
        for key in self.stats.keys():
            self.stats[key] += newPlayer[key]  
          
        return ind
      
    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                res = "TEAM == 0"
                next_df = unpackState(next_state)
                act_values = self.model.predict(np.array([next_state]))
                next_df["act_values"] = act_values[0]
                target = reward + (self.gamma * next_df.query(res)['act_values'].max())              

            target_f = self.model.predict(np.array([state]))
            #print("{} -> {} + {}".format(target_f[0][action], reward,self.gamma * next_df.query(res)['act_values'].max()))
            target_f[0][action] = target

            self.model.fit(np.array([state]), target_f, epochs=1, verbose=0)
            
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
    
    def reset(self):
        self.type = "DQN"
        self.id = 1
        self.team = []   
        self.stats = {'FG%_z':0,'3P_z':0,'FT%_z':0,'TRB_z':0,'AST_z':0,'STL_z':0,'BLK_z':0,'TOV_z':0,'PTS_z':0, 'Overall_z':0}   
        self.roster = {1:0, 2:0, 3:0, 4:0, 5:0}
    
    def save(self):
        # Save the model weights
        self.model.save_weights('FantasyDraft-DQN.h5')
    
    def load(self):
        self.model.load_weights('FantasyDraft-DQN.h5')
        print("Succesfully loaded weights!")
      

In [9]:
# TODO: Multiagent Training
# TODO: Reward function should be much bigger for the final round

# Looks like the agent is learning to punt 3P, FT%, and TO
# TODO: Try running the agent with the 2017-2018 stats.

# Training the DQN agent

# Initialize the environment
poolSize = 280
numRounds = 13
numTeams = 12
env = DraftEnvironment(poolSize, _, numTeams)
numEpisodes = 1
done = 0

metricsEvery = 1
renderEvery = 1
roundScore = 0
roundOverall = 0
avgScore = []
avgOverall = []

fo = open("Logs-FantasyDraft-DQN.txt", "w")

timeStart = time.time()
# Iterate the draft
for e in range(numEpisodes):
  
    # Reset environment
    state = env.reset()
  
    # Until the drafting process is done
    for r in range(numRounds):
    
        for agent in env.agents:
            # DQN Agent decides action
            action = agent.act(state)
            # Move to the next state given the DQN Agents actiono
            next_state, reward, done, _ = env.step(action)
            # Save the experience to memory
            agent.remember(state, action, reward, next_state, done)
            # Update current state
            state = next_state

            #roundScore += reward
    
    # Print the agents score
    avgScore.append(roundScore)
    roundScore = 0
    avgOverall.append(agent.stats['Overall_z'])
        
    if e % metricsEvery == 0:
        text = "episode: {}/{}, score: {}, overall_Z: {}, time: {}".format(e, numEpisodes, sum(avgScore)/len(avgScore), sum(avgOverall)/len(avgOverall), datetime.timedelta(seconds=time.time() - timeStart))
        print(text)
        fo.write(text+'\n')
        avgScore = []
        avgOverall = []
        agent.save()

    if e % renderEvery == 0 and e > 0:
        env.render()
            
    if len(agent.memory) > 128*10:
        # Train the agent using its experiences
        agent.replay(128)
        
fo.close()

W0828 03:19:20.618001 139758875342592 deprecation_wrapper.py:119] From /home/rafaelvcantero/.local/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0828 03:19:20.634458 139758875342592 deprecation_wrapper.py:119] From /home/rafaelvcantero/.local/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0828 03:19:20.637503 139758875342592 deprecation_wrapper.py:119] From /home/rafaelvcantero/.local/lib/python3.5/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0828 03:19:20.692251 139758875342592 deprecation_wrapper.py:119] From /home/rafaelvcantero/.local/lib/python3.5/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimize

Succesfully loaded weights!
Succesfully loaded weights!
Succesfully loaded weights!
Succesfully loaded weights!
Succesfully loaded weights!
Succesfully loaded weights!
Succesfully loaded weights!
Succesfully loaded weights!
Succesfully loaded weights!
Succesfully loaded weights!
Succesfully loaded weights!


  result = method(y)


ValueError: Error when checking input: expected dense_21_input to have shape (3784,) but got array with shape (3663,)

In [None]:
for agent in env.agents:
    print(agent.id)
    print(len(agent.team))
    print(agent.stats)
    print("---")

In [None]:
x = env.pool.values.flatten()

In [None]:
poolSize = 280
numRounds = 13
numTeams = 12
agent = MyDQNAgent(poolSize, 1)
agent.load()
env = DraftEnvironment(poolSize, agent, numTeams)
# Reset environment
state = env.reset()

In [None]:
# DQN Agent decides action
action = agent.act(state)
# Move to the next state given the DQN Agents action
next_state, reward, done, _ = env.step(action)
agent.remember(state, action, reward, next_state, done)
# Update current state
state = next_state

env.render()

In [None]:
df['act_values'].max()
reward

In [None]:
x = 0
print(env.agents[x].team)
print(env.agents[x].stats)

In [None]:
x+=1
print(env.agents[x].team)
print(env.agents[x].stats)

In [None]:
!jupyter nbconvert --to script FantasyDraft-DQN.ipynb