Generates a lineup using a genetic algorithm, using the known scores of the players (oracle solution)
Could also be extended to create lineups based of any heuristic




# Installing/Importing Necessary Libraries

In [1]:
import numpy as np
import pandas as pd

import random

In [2]:
# stop the pandas indexing/splicing warning from appearing
import warnings
warnings.filterwarnings('ignore')

In [3]:
# for oracle

param = 'FPTS'

df = pd.read_csv('/content/dataset2_splitpositions_minutesfixed.csv')
df = df[['Date', 'Name', 'Position', 'Salary', param]]
# df = df[df.FPTS >= 0] # drop all players who scored less than 0 points
df.dropna(subset=[param], inplace=True)
df.shape

(155418, 5)

In [None]:
# for salary heuristic

param = 'Prediction'

df = pd.read_csv('/content/2020-21_predictionsUsingSalary.csv')
df = df[['Date', 'Name', 'Position', 'Salary', 'FPTS', param]]
# df = df[df.FPTS >= 0] # drop all players who scored less than 0 points
df.dropna(subset=[param], inplace=True)
df.shape

(76697, 6)

In [None]:
# for rolling avg heuristic

param = 'Pred1'

df = pd.read_csv('/content/2020-21_seasonAvgSplitPositions.csv')
df = df[['Date', 'Name', 'Team', 'Position', 'Salary', 'FPTS', param]]
# df = df[df.FPTS >= 0] # drop all players who scored less than 0 points
df.dropna(subset=[param], inplace=True)
df.shape

(75151, 7)

In [None]:
# for ridge reg heuristic

param = 'Pred2'

df = pd.read_csv('/content/2020-21_ridgeRegPredictions.csv')
df = df[['Date', 'Name', 'Team', 'Position', 'Salary', 'FPTS', param]]
# df = df[df.FPTS >= 0] # drop all players who scored less than 0 points
df.dropna(subset=[param], inplace=True)
df.shape

(75151, 7)

In [None]:
# for season average and ridge regression

param = 'RidgeRegPred'
df = pd.read_csv('/content/2020-21_SPLIT_seasonAvgAndRidgeReg.csv')
df = df[['Date', 'Name', 'Position', 'Salary', 'FPTS', 'SeasonAvgPred', 'RidgeRegPred']]
# df.sort_values(['Date','Salary', 'RidgeRegPred'], ascending=[True, True, False], inplace=True)
df.reset_index(inplace=True, drop=True)
df.shape

(75151, 7)

In [4]:
# all the dates that the brute force was able to run on
dates = df['Date'].unique()
len(dates)

356

# Helper Functions

In [5]:
# returns a list of dataframes for each position
def buildPositionDFs(df1, order, sort=['Salary', 'FPTS'], incr=[True, False]):
  # incr determines if the players are each position are sorted by increasing salary
  l1 = []
  for i in order:
    # print(i)
    posDF = df1[df1['Position'] == i]
    posDF.sort_values(sort, ascending=incr, inplace=True)
    posDF.reset_index(inplace=True, drop=True)
    l1.append(posDF)
  return l1

In [6]:
# checkDuplicates takes in a dict of players, checks for duplicates
def checkDuplicates(players):
  setOfPlayers = set()
  for player in players.values():
      if player in setOfPlayers:
          return True
      else:
          setOfPlayers.add(player)         
  return False

In [7]:
# checkSalary takes in a dict of players, checks if salary > 50000
# if yes return true, else return false
def checkSalary(players):
  total_salary = getSalary(players)
  if total_salary > 50000:
    return True
  return False

# getSalary takes in a dict of players, returns the combined salary of all players
def getSalary(players):
  total_salary = 0
  for player in players.values():
    player_sal = currentPlayers[currentPlayers['Name'] == player].iloc[0].Salary
    total_salary += player_sal
  return total_salary

In [84]:
getSalary(lineup1[0])

40000

In [8]:
def getSingleScore(player, df2, param):
  return df2[df2['Name'] == player].iloc[0][param]

# getScore takes in a dict of players, returns the fantasy points scored by players
def getScore(players, df_list, param):
  total_score = 0
  for pos in players:
    i = pos_idxs[pos]
    player_score = getSingleScore(players[pos], df_list[i], param)
    # print(player, player_score)
    total_score += player_score
  return total_score

In [44]:
def getPlayer(df1, date, player):
  return df1[(df1['Date'] == date) & (df1['Name'] == player)]

# test

naive avg: 176

bruteforce avg: 286.54

previous greedy selection avg: 262.54

current greedy selection avg: 206.35

genetic alg lineup selection avg: 360.22

In [10]:
# variables for the genetic algorithm
mu = 25
lamb = 25
generations = 50
mutation_rate = 0.25 # probabilty that a new element will undergo mutation
mutation_attempts = 10

crossover_rate = 0.5 # probabilty that a new element will undergo crossover
crossover_attempts = 10

In [11]:
def initialize_population(l1, mu, lamb, param):
  population = []
  while len(population) < mu+lamb:
    
    lineup = {}
    for i in range(len(l1)):
      player = l1[i].sample()

      lineup[positions[i]] = player['Name'].iloc[0]
      # print(player)
    
    if not checkDuplicates(lineup) and not checkSalary(lineup):
    # if not checkDuplicates(lineup):
      predScore = getScore(lineup, l1, param)
      FPTS = getScore(lineup, l1, 'FPTS')
      salary = getSalary(lineup)
      population.append([lineup, salary, predScore, FPTS])
    
  return population

In [12]:
def print_population(population):
  for i in range(len(population)):
    print(population[i][0])
    print(f'Salary: {population[i][1]},\tPredicted: {population[i][2]},\tFPTS: {population[i][3]}')

In [13]:
def mutate(lineup,mutation_rate,l1, param):
  old_lineup = lineup.copy()
  done = False
  num_tries = 0

  while not done and num_tries < mutation_attempts:
    # for each position in your lineup, replace current player with random player with probability mutation_rate
    # if doesn't violate any constraints we're done, otherwise reset the chromosome to its starting condition and try again
    players = old_lineup[0].copy()
    count = 0 # to make sure a mutation gets made
    
    # print(f'BEFORE: {players}')
    for pos in players:
      if random.random() < mutation_rate:
        count += 1
        # print(pos)
        idx = pos_idxs[pos]
        # print(idx)

        # num_tries += 1
        player = l1[idx].sample()
        player_name = player['Name'].iloc[0]

        # print(f'Swapping {pos}: {players[pos]} for {player_name}')

        players[pos] = player_name
    # print(f'AFTER: {players}')

    if not checkDuplicates(players) and not checkSalary(players) and count > 0:
      # print(f'Valid Lineup')
      lineup[0] = players
      lineup[1] = getSalary(players)
      lineup[2] = getScore(players, l1, param)
      lineup[3] = getScore(players, l1, 'FPTS')
      done = True
    num_tries += 1
  return lineup

In [14]:
# def crossover(lineup1,lineup2, crossover_rate):
def crossover(lineup1, lineup2, l1, param):
  done = False
  numTries = 0

  child = {}

  while not done and numTries<crossover_attempts:
  #for each position, pick the current player from either parent at random
  #if doesn't violate any constraints we're done, try again
    for pos in lineup1[0]:
      if random.random() < 0.5:
        child[pos] = lineup1[0][pos]
      else:
        child[pos] = lineup2[0][pos]
    
    salary = np.inf
    predScore = 0
    FPTS = 0
    if not checkDuplicates(child) and not checkSalary(child):
      # print('Valid lineup')
      salary = getSalary(child)
      predScore = getScore(child, l1, param)
      FPTS = getScore(child, l1, 'FPTS')
      done = True
      # print(child, salary, score)
    numTries += 1
  if salary == np.inf or predScore == 0 or FPTS == 0:
    print(child)
  return [child, salary, predScore, FPTS]
  


In [90]:
p1 = population[6]
p1

[{'PG': 'Shai Gilgeous-Alexander',
  'SG': 'Evan Fournier',
  'SF': 'Terrence Ross',
  'PF': 'Joe Ingles',
  'C': 'Richaun Holmes',
  'G': 'Dillon Brooks',
  'F': 'Lauri Markkanen',
  'UTIL': 'Seth Curry'},
 40600,
 198.75,
 198.75]

In [92]:
p2 = population[10]
p2

[{'PG': 'Ben Simmons',
  'SG': 'Frank Ntilikina',
  'SF': 'Doug McDermott',
  'PF': 'Rui Hachimura',
  'C': 'Joel Embiid',
  'G': 'Allonzo Trier',
  'F': 'Isaac Bonga',
  'UTIL': 'Luke Kennard'},
 39900,
 186.75,
 186.75]

In [94]:
c = crossover(p1, p2, l1, param)
c

[{'PG': 'Shai Gilgeous-Alexander',
  'SG': 'Evan Fournier',
  'SF': 'Doug McDermott',
  'PF': 'Rui Hachimura',
  'C': 'Richaun Holmes',
  'G': 'Allonzo Trier',
  'F': 'Lauri Markkanen',
  'UTIL': 'Luke Kennard'},
 37700,
 210.5,
 210.5]

In [15]:
mu, lamb, generations

(25, 25, 50)

In [36]:
positions = ['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'UTIL']
pos_idxs = {'PG':0, 'SG':1, 'SF':2, 'PF':3, 'C':4, 'G':5, 'F':6, 'UTIL':7 }
data = []


In [19]:
# for x in range(len(dates)):
for x in range(1,2):
  date = dates[x]
  print(x, date)

  # continue

  currentPlayers = df[df['Date']==date]

  l1 = buildPositionDFs(currentPlayers, positions)

  # initialize a population of size mu+lamb, consists of randomly generated valid lineups
  population = initialize_population(l1, mu, lamb, param)
  # print_population(population)

  break

  for generation in range(generations):
    if (generation%10 == 0):
      print(f'Generation {generation}')
      # print_population(population)
    # sort the population in descending order by param
    population = sorted(population, key=lambda x: x[2], reverse=True)

    # print_population(population)

    elites = population[0:mu] #the best mu are unchanged
    # print(len(elites))
    # print_population(elites)

    new_lineups = []
    for i in range(lamb):
      # create a copy of a random elite
      new_lineup = random.choice(elites).copy()
      # print(new_lineup)

      # alternatively, use each elite once
      # new_lineup = elites[i].copy()

      new_lineup = mutate(new_lineup, mutation_rate, l1, param)

      if random.random() < crossover_rate:
        # print('CROSSOVER')
        parent2 = random.choice(elites)
        new_lineup = crossover(new_lineup, parent2, l1, param)
      new_lineups.append(new_lineup)

    population = elites+new_lineups

  # print('Elites:')
  # print_population(elites)
  # print('Mutations:')
  # print_population(new_lineups)
  # print('Population:')
  # print_population(population)

  # resorting the population again
  # even though typically the best solutions are already at the front of the list
  population = sorted(population, key=lambda x: x[2], reverse=True)
  soln = population[0][0]

  soln['Date'] = date
  soln['Salary'] = population[0][1]
  soln['Prediction'] = population[0][2]
  soln['FPTS'] = population[0][3]
  print(soln)
  data.append(soln)

1 20191023
{'PG': 'Isaac Bonga', 'SG': 'Jordan McRae', 'SF': 'Matisse Thybulle', 'PF': 'Mason Plumlee', 'C': 'Caleb Swanigan', 'G': 'Ben Simmons', 'F': 'Wenyen Gabriel', 'UTIL': 'Treveon Graham'}
Salary: 30900,	Predicted: 151.5,	FPTS: 151.5
{'PG': 'Allonzo Trier', 'SG': 'Nicolas Batum', 'SF': 'Harrison Barnes', 'PF': 'Cedi Osman', 'C': 'Markieff Morris', 'G': 'Bryn Forbes', 'F': 'Justin Jackson', 'UTIL': 'Bogdan Bogdanovic'}
Salary: 34000,	Predicted: 124.0,	FPTS: 124.0
{'PG': 'Aaron Holiday', 'SG': 'Yogi Ferrell', 'SF': 'Mario Hezonja', 'PF': 'Richaun Holmes', 'C': 'Cody Zeller', 'G': 'Shabazz Napier', 'F': 'Davis Bertans', 'UTIL': 'Bryn Forbes'}
Salary: 28700,	Predicted: 118.75,	FPTS: 118.75
{'PG': 'Patty Mills', 'SG': 'Coby White', 'SF': 'Matisse Thybulle', 'PF': 'Kelly Olynyk', 'C': 'Ed Davis', 'G': 'Torrey Craig', 'F': 'Dillon Brooks', 'UTIL': 'Bryn Forbes'}
Salary: 29600,	Predicted: 170.5,	FPTS: 170.5
{'PG': 'Darius Garland', 'SG': 'Derrick White', 'SF': 'Torrey Craig', 'PF': 'Dou

In [None]:
data = []

# for visualizing genetic algorithms
for i in range(len(population)):
  ind = population[i][0].copy()
  ind['Salary'] = population[i][1]
  ind['FPTS'] = population[i][3]
  # print(ind)
  data.append(ind)

pop = pd.DataFrame(data)
pop.to_csv(f'initialization.csv', line_terminator='\n', index=False)

In [37]:
population = sorted(population, key=lambda x: x[2], reverse=True)
population

[[{'PG': 'Allonzo Trier',
   'SG': 'Terrance Ferguson',
   'SF': 'Treveon Graham',
   'PF': 'Jayson Tatum',
   'C': 'Cody Zeller',
   'G': 'Evan Fournier',
   'F': 'Kelly Olynyk',
   'UTIL': 'Karl-Anthony Towns'},
  40500,
  229.25,
  229.25],
 [{'PG': 'Daryl Macon',
   'SG': 'Bradley Beal',
   'SF': 'Tobias Harris',
   'PF': 'Nemanja Bjelica',
   'C': 'Nikola Vucevic',
   'G': 'DJ Augustin',
   'F': 'Lauri Markkanen',
   'UTIL': 'Tim Frazier'},
  46900,
  229.0,
  229.0],
 [{'PG': 'Emmanuel Mudiay',
   'SG': 'Evan Fournier',
   'SF': 'Trevor Ariza',
   'PF': 'Moritz Wagner',
   'C': 'Karl-Anthony Towns',
   'G': 'Will Barton',
   'F': 'Miles Bridges',
   'UTIL': 'Seth Curry'},
  40400,
  223.75,
  223.75],
 [{'PG': 'Dennis Schroder',
   'SG': 'Will Barton',
   'SF': 'Jeremy Lamb',
   'PF': 'Doug McDermott',
   'C': 'Deandre Ayton',
   'G': 'Jeff Teague',
   'F': 'LaMarcus Aldridge',
   'UTIL': 'Dwayne Bacon'},
  43600,
  221.5,
  221.5],
 [{'PG': 'Monte Morris',
   'SG': 'Kendrick Nun

In [None]:
data

In [None]:
soln_df = pd.DataFrame(data)
soln_df

Unnamed: 0,PG,SG,SF,PF,C,G,F,UTIL,Date,Salary,Prediction,FPTS
0,Jeff Teague,Jaylen Brown,Khris Middleton,Brandon Ingram,Montrezl Harrell,Goran Dragic,Maxi Kleber,Bam Adebayo,20201225,49500,331.921177,252.75
1,Terry Rozier,Joe Ingles,Terrence Ross,Julius Randle,Domantas Sabonis,Dejounte Murray,Harrison Barnes,Keldon Johnson,20201226,49600,329.314797,271.00
2,Darius Garland,Alec Burks,Terrence Ross,DeMar DeRozan,Nikola Vucevic,Dejounte Murray,Domantas Sabonis,Dante Exum,20201227,49800,317.937673,243.25
3,George Hill,James Harden,Josh Jackson,Darius Bazley,Christian Wood,Ja Morant,Dillon Brooks,Talen Horton-Tucker,20201228,49700,339.276206,253.50
4,Monte Morris,Duncan Robinson,Brandon Ingram,Nicolas Batum,Andre Drummond,Luguentz Dort,Josh Jackson,Nikola Jokic,20201229,49700,305.518754,274.00
...,...,...,...,...,...,...,...,...,...,...,...,...
167,Bogdan Bogdanovic,Bryn Forbes,Pat Connaughton,Giannis Antetokounmpo,Brook Lopez,Trae Young,Bobby Portis,Clint Capela,20210627,49800,244.565422,221.00
168,Chris Paul,Devin Booker,Nicolas Batum,Marcus Morris,Deandre Ayton,Paul George,Dario Saric,DeMarcus Cousins,20210628,49700,234.972763,262.50
169,Jeff Teague,Bogdan Bogdanovic,Khris Middleton,John Collins,Clint Capela,Cam Reddish,Giannis Antetokounmpo,Bobby Portis,20210629,50000,244.947555,193.75
170,Cameron Payne,Devin Booker,Nicolas Batum,Marcus Morris,DeMarcus Cousins,Chris Paul,Mikal Bridges,Paul George,20210630,49300,231.938788,258.50


In [None]:
soln_df['Prediction'].mean()

260.2727041774657

In [None]:
soln_df.to_csv(f'{param}geneticAlg.csv', line_terminator='\n', index=False)


# Double Checking Genetic Algorithm Oracle Results

In [None]:
soln_df = pd.read_csv('geneticAlgOracleResults.csv')


In [None]:
soln_df


Unnamed: 0,PG,SG,SF,PF,C,G,F,UTIL,Date,Salary,FPTS
0,Dennis Schroder,Kentavious Caldwell-Pope,Thabo Sefolosha,Ersan Ilyasova,Andre Drummond,Stephen Curry,Marcus Morris,LeBron James,20151027,49800,322.50
1,Ricky Rubio,Bradley Beal,Danilo Gallinari,Greg Monroe,Jahlil Okafor,CJ McCollum,Kawhi Leonard,Jonas Valanciunas,20151028,49800,372.50
2,Jeff Teague,CJ Miles,Carmelo Anthony,Blake Griffin,Al Horford,Kyle Korver,Kyle OQuinn,John Jenkins,20151029,47900,287.00
3,Reggie Jackson,Victor Oladipo,TJ Warren,Anthony Tolliver,Karl-Anthony Towns,Russell Westbrook,Marcus Morris,Kevin Love,20151030,49600,379.25
4,Stephen Curry,Langston Galloway,Carmelo Anthony,Drew Gooden,Willie Cauley-Stein,Mike Conley,Rudy Gay,Brandon Knight,20151031,49600,355.00
...,...,...,...,...,...,...,...,...,...,...,...
196,Russell Westbrook,Klay Thompson,Andre Iguodala,Serge Ibaka,Festus Ezeli,Dion Waiters,Kevin Durant,Andre Roberson,20160524,49500,312.25
197,Kyrie Irving,DeMar DeRozan,LeBron James,Kevin Love,Timofey Mozgov,JR Smith,Tristan Thompson,Richard Jefferson,20160525,47900,233.25
198,Russell Westbrook,Andre Roberson,Kevin Durant,Serge Ibaka,Marreese Speights,Dion Waiters,Draymond Green,Andrew Bogut,20160526,49800,284.75
199,Kyle Lowry,Iman Shumpert,LeBron James,Kevin Love,Bismack Biyombo,JR Smith,James Johnson,Kyrie Irving,20160527,49900,262.25


In [None]:
dates=soln_df.Date.unique()
len(dates)

201

In [None]:
# double checking that the dataframe has correct FPTS and Salary info
# and only consists of valid lineups

for x in range(len(dates)):
  date = dates[x]
  print(date)
  currentPlayers = df2[df2['Date']==date]

  # get the list of players for the specific date
  l1 = buildPositionDFs(currentPlayers, positions)

  # get the soln for the specific date
  geneticSoln = soln_df[soln_df['Date']==date].squeeze()

  lineup = geneticSoln[['PG', 'SG', 'SF', 'PF', 'C', 'G', 'F', 'UTIL']].to_dict()

  # check each player is in the correct position
  for i in range(8):
    pos = positions[i]
    pos_df = l1[i]
    player = lineup[pos]

    # print(player, pos_df['Name'].to_list())
    if not pos_df['Name'].str.contains(player).any():
      raise ValueError(f'Date: {date}\tPlayer missing {player}')


  soln_sal = geneticSoln['Salary']
  soln_score = geneticSoln['FPTS']
  # print(lineup)

  salary = getSalary(lineup)
  score = getScore(lineup)

  # print(soln_sal, salary)
  # print(soln_score, score)

  if (salary > 50000):
    raise ValueError(f'Date: {date}\tSalary > 50000: {salary}')

  if (salary != soln_sal):
    raise ValueError(f'Date: {date}\tSalaries not matching: {soln_sal},{salary}')

  if (score != soln_score):
    raise ValueError(f'Date: {date}\tScores not matching: {soln_score},{score}')

20151027
20151028
20151029
20151030
20151031
20151101
20151102
20151103
20151104
20151105
20151106
20151107
20151108
20151109
20151110
20151111
20151112
20151113
20151114
20151115
20151116
20151117
20151118
20151119
20151120
20151121
20151122
20151123
20151124
20151125
20151127
20151128
20151129
20151130
20151201
20151202
20151203
20151204
20151205
20151206
20151207
20151208
20151209
20151210
20151211
20151212
20151213
20151214
20151215
20151216
20151217
20151218
20151219
20151220
20151221
20151222
20151223
20151225
20151226
20151227
20151228
20151229
20151230
20151231
20160101
20160102
20160103
20160104
20160105
20160106
20160107
20160108
20160109
20160110
20160111
20160112
20160113
20160114
20160115
20160116
20160117
20160118
20160119
20160120
20160121
20160122
20160123
20160124
20160125
20160126
20160127
20160128
20160129
20160130
20160131
20160201
20160202
20160203
20160204
20160205
20160206
20160207
20160208
20160209
20160210
20160211
20160218
20160219
20160220
20160221
20160222
2

# Thesis Defense Examples

Mutation Example

In [None]:
lineup1 = population[5].copy()
lineup1

In [None]:
mut_lineup1 = mutate(lineup1, mutation_rate, l1, param)
mut_lineup1

[{'PG': 'Coby White',
  'SG': 'Rodney Hood',
  'SF': 'Thaddeus Young',
  'PF': 'Meyers Leonard',
  'C': 'Andre Drummond',
  'G': 'Jeremy Lamb',
  'F': 'Davis Bertans',
  'UTIL': 'KZ Okpala'},
 36800,
 198.5,
 198.5]

In [None]:
for pos in mut_lineup1[0]:
  print(getPlayer(df, dates[1], mut_lineup1[0][pos]))

Crossover Example