Selects lineups based off of a greedy approach

Picks the best possible player, assigns to most specific positions

Given that the rest of the roster can still be filled

#Importing Libraries, Dataset

In [None]:
import numpy as np
import pandas as pd

In [None]:
# stop the pandas indexing/splicing warning from appearing
import warnings
warnings.filterwarnings('ignore')

In [None]:
# for oracle

df = pd.read_csv('/content/dataset_2020-21_splitpositions.csv')
# df.sort_values(['Date','Name', 'FPTS'], ascending=[True, True, False], inplace=True)
df.sort_values(['Date','Salary', 'FPTS'], ascending=[True, True, False], inplace=True)
df.reset_index(inplace=True, drop=True)

In [None]:
# for linear regression using salary

df = pd.read_csv('/content/2020-21_predictionsUsingSalary.csv')
df.sort_values(['Date','Salary', 'Prediction'], ascending=[True, True, False], inplace=True)
df.reset_index(inplace=True, drop=True)

In [None]:
# for season averages and ridge regression

df = pd.read_csv('/content/2020-21_SPLIT_seasonAvgAndRidgeReg.csv')
df = df[['Date', 'Name', 'Position', 'Salary', 'FPTS', 'SeasonAvgPred', 'RidgeRegPred']]
# df.sort_values(['Date','Salary', 'SeasonAvgPred'], ascending=[True, True, False], inplace=True)
df.sort_values(['Date','Salary', 'RidgeRegPred'], ascending=[True, True, False], inplace=True)
df.reset_index(inplace=True, drop=True)


In [None]:
sort_dict = {'PG':1,'SG':2,'SF':3,'PF':4,'C':5,'G':6,'F':7,'UTIL':8}
df['Pos'] = df['Position'].map(sort_dict)

dates = df['Date'].unique()

In [None]:
df

Unnamed: 0,Date,Name,Position,Salary,FPTS,SeasonAvgPred,RidgeRegPred,Pos
0,20201225,Precious Achiuwa,C,3000,19.25,11.750000,12.596637,5
1,20201225,Precious Achiuwa,F,3000,19.25,11.750000,12.596637,7
2,20201225,Precious Achiuwa,PF,3000,19.25,11.750000,12.596637,4
3,20201225,Precious Achiuwa,UTIL,3000,19.25,11.750000,12.596637,8
4,20201225,Nicolo Melli,F,3000,1.00,7.000000,8.991235,7
...,...,...,...,...,...,...,...,...
75146,20210701,Khris Middleton,F,9400,62.25,38.236301,37.913684,7
75147,20210701,Khris Middleton,G,9400,62.25,38.236301,37.913684,6
75148,20210701,Khris Middleton,SF,9400,62.25,38.236301,37.913684,3
75149,20210701,Khris Middleton,SG,9400,62.25,38.236301,37.913684,2


# Helper Functions

In [None]:
# returns a list of dataframes for each position
def buildPositionDFs(df1, order, sort=['Salary', 'FPTS'], incr=[True, False]):
  # incr determines if the players are each position are sorted by increasing salary
  l1 = []
  for i in order:
    # print(i)
    posDF = df1[df1['Position'] == i]
    posDF.sort_values(sort, ascending=incr, inplace=True)
    posDF.reset_index(inplace=True, drop=True)
    l1.append(posDF)
  return l1

In [None]:
# checkDuplicates takes in a dict of players, checks for duplicates
def checkDuplicates(players):
  setOfPlayers = set()
  for player in players:
      if player in setOfPlayers:
          return True
      else:
          setOfPlayers.add(player)         
  return False

In [None]:
def getSingleScore(player, df1, param):
  return df1[df1['Name'] == player].iloc[0][param]

# getScore takes in a dict of players, returns the fantasy points scored by players
def getScore(lineup, df1, param):
  total_score = 0
  for player in lineup.values():
    player_score = getSingleScore(player, df1, param)
    # print(player, player_score)
    total_score += player_score
  return total_score

In [None]:
# checkSalary takes in a dict of players, checks if salary > 50000
# if yes return true, else return false
def checkSalary(players):
  total_salary = getSalary(players)
  if total_salary > 50000:
    return True
  return False

def getSingleSalary(player, df1):
  return df1[df1['Name'] == player].iloc[0].Salary

# getSalary takes in a dict of players, returns the combined salary of all players
def getSalary(lineup, df1):
  total_salary = 0
  for player in lineup.values():
    player_sal = getSingleSalary(player, df1)
    total_salary += player_sal
  return total_salary

In [None]:
def getPlayerDF(df1, player):
  return df1[df1['Name'] == player]

def getPlayerSingleGame(df1, date, player, position):
  return df1[(df1['Date'] == date) & (df1['Name'] == player) & (df1['Position'] == position)]
  # return df1[(df1['Date'] == date) & (df1['Name'] == player)]

# Greedily Selecting Lineups

In [None]:
def most_specific_position(p,remaining):
  if p in remaining:
    return p
  if (p=="PG" or p=="SG") and "G" in remaining:
    return "G" 
  if (p=="SF" or p=="PF") and "F" in remaining:
    return "F"
  if "UTIL" in remaining:
    return "UTIL"
  return None

In [None]:
def greedy_search(candidates,budget,vacancies,cheapest):
  lineup = {}
  for index, player in candidates.iterrows():
    if len(vacancies) == 0: #lineup complete
      return lineup
    if player.Name in lineup.values():
      continue
    potential_lineup = lineup.copy()
    pos = most_specific_position(player.Position,vacancies) # Get a player's most specific valid position given vacancies
    if pos is None:
      continue
    potential_lineup[pos] = player.Name
    # print(potential_lineup)
    if check_viability(player, budget, vacancies, cheapest, potential_lineup):
      lineup[pos] = player.Name
      vacancies.remove(pos)
      budget -= player.Salary
  return lineup

In [None]:
def check_viability(player, budget, vacancies, cheapest, lineup):
  if player.Salary > budget:
    return False
  pos = most_specific_position(player.Position, vacancies)
  if pos is None: # player doesnt fit in any position in the lineup
    return False
  remaining_vacancies = vacancies.copy()
  remaining_vacancies.remove(pos)
  viable, cheapest_lineup = make_cheapest_assignment(budget-player.Salary, remaining_vacancies, cheapest, lineup)

  if not viable:
    return False

  # print("Player {} deemed viable for position {} with remaining budget {} and the following cheapest lineup:".format(player.Name,pos,budget-player.Salary))
  # print(cheapest_lineup)
  return True

In [None]:
def make_cheapest_assignment(budget, vacancies, cheapest, lineup):
  for index, player in cheapest.iterrows():
    if player.Name in lineup.values():
      continue # player already in lineup, ignore player
    if len(vacancies) == 0:
      return True, lineup
    if player.Salary > budget:
      # cheapest player too expensive, can't make lineup
      return False, lineup
    pos = most_specific_position(player.Position, vacancies)
    if pos is None:
      continue
    budget = budget - player.Salary
    vacancies.remove(pos)
    lineup[pos] = player.Name
  return False, lineup

In [None]:
error_count = 0
data = []

positions = ["PG","SG","SF","PF","C","G","F","UTIL"]
budget = 50000
param = 'RidgeRegPred'

In [None]:
for x in range(len(dates)):
# for x in range(10):
  date = dates[x]
  currentPlayers = df[df['Date'] == date]
  currentPlayers.reset_index(inplace=True, drop=True)

  candidates = currentPlayers.copy()
  # candidates.sort_values([param, 'Name', 'Pos'], ascending=[False,True,True], inplace=True)
  candidates.sort_values([param, 'Pos'], ascending=[False,True], inplace=True)
  candidates.reset_index(inplace=True, drop=True)

  cheapest = currentPlayers.copy()
  # cheapest.sort_values(['Salary', param, 'Pos'], ascending=[True, False, True], inplace=True)
  # cheapest.sort_values(['Salary', 'Name', 'Pos'], ascending=[True, True, True], inplace=True)
  cheapest.sort_values(['Salary', 'Pos'], ascending=[True, True], inplace=True)
  cheapest.reset_index(inplace=True, drop=True)

  vacancies = set(positions)

  lineup = greedy_search(candidates, budget, vacancies, cheapest)
  if len(lineup) != len(positions) or budget < 0:
    print('Error: Failed to create lineup')
    error_count += 1
    continue
  
  entry = lineup.copy()
  entry['Date'] = date
  entry['Salary'] = getSalary(lineup, currentPlayers)
  entry[param] = getScore(lineup, currentPlayers, param)
  entry['FPTS'] = getScore(lineup, currentPlayers, 'FPTS')

  data.append(entry)
soln_df = pd.DataFrame(data)

In [None]:
# without sorting
soln_df

Unnamed: 0,PF,SF,SG,C,PG,F,UTIL,G,Date,Salary,RidgeRegPred,FPTS
0,Giannis Antetokounmpo,Brandon Ingram,Khris Middleton,Bam Adebayo,Dennis Schroder,Juan Toscano-Anderson,Precious Achiuwa,Mychal Mulder,20201225,50000,303.816820,213.50
1,Domantas Sabonis,DeAndre Bembry,Joe Ingles,Karl-Anthony Towns,Russell Westbrook,Chuma Okeke,Damian Jones,Terry Rozier,20201226,50000,283.921521,221.00
2,Domantas Sabonis,Khris Middleton,Bradley Beal,Andre Drummond,Dante Exum,DeMar DeRozan,Robert Williams,Mychal Mulder,20201227,50000,295.878970,272.00
3,Kenrich Williams,Georges Niang,George Hill,Christian Wood,James Harden,Solomon Hill,Trae Young,Ja Morant,20201228,50000,307.519419,204.75
4,Domantas Sabonis,Jordan Nwora,Theo Maledon,Nikola Jokic,Russell Westbrook,Precious Achiuwa,Andre Drummond,TJ McConnell,20201229,50000,285.905853,282.00
...,...,...,...,...,...,...,...,...,...,...,...,...
167,Giannis Antetokounmpo,Solomon Hill,Khris Middleton,Clint Capela,Trae Young,Bobby Portis,Jeff Teague,Bryn Forbes,20210627,49900,240.707850,228.50
168,Dario Saric,Mikal Bridges,Paul George,Deandre Ayton,Chris Paul,Luke Kennard,DeMarcus Cousins,Devin Booker,20210628,49600,232.675729,230.75
169,Giannis Antetokounmpo,Jordan Nwora,Khris Middleton,Clint Capela,Jrue Holiday,Bobby Portis,Jeff Teague,Bogdan Bogdanovic,20210629,49900,242.570503,190.00
170,Dario Saric,Mikal Bridges,Paul George,Deandre Ayton,Chris Paul,Abdel Nader,Torrey Craig,Devin Booker,20210630,49800,222.910350,229.25


In [None]:
soln_df.FPTS.mean()

236.55959302325581

In [None]:
# with sorting
soln_df

Unnamed: 0,PF,SF,SG,C,PG,F,UTIL,G,Date,Salary,RidgeRegPred,FPTS
0,Giannis Antetokounmpo,Brandon Ingram,Khris Middleton,Bam Adebayo,Dennis Schroder,Juan Toscano-Anderson,Precious Achiuwa,Mychal Mulder,20201225,50000,303.816820,213.50
1,Domantas Sabonis,DeAndre Bembry,Joe Ingles,Karl-Anthony Towns,Russell Westbrook,Chuma Okeke,Damian Jones,Terry Rozier,20201226,50000,283.921521,221.00
2,Domantas Sabonis,Khris Middleton,Bradley Beal,Andre Drummond,Dante Exum,DeMar DeRozan,Robert Williams,Mychal Mulder,20201227,50000,295.878970,272.00
3,Kenrich Williams,Georges Niang,George Hill,Christian Wood,James Harden,Solomon Hill,Trae Young,Ja Morant,20201228,50000,307.519419,204.75
4,Domantas Sabonis,Jordan Nwora,Theo Maledon,Nikola Jokic,Russell Westbrook,Precious Achiuwa,Andre Drummond,TJ McConnell,20201229,50000,285.905853,282.00
...,...,...,...,...,...,...,...,...,...,...,...,...
167,Giannis Antetokounmpo,Solomon Hill,Khris Middleton,Clint Capela,Trae Young,Bobby Portis,Jeff Teague,Bryn Forbes,20210627,49900,240.707850,228.50
168,Dario Saric,Mikal Bridges,Paul George,Deandre Ayton,Chris Paul,Luke Kennard,DeMarcus Cousins,Devin Booker,20210628,49600,232.675729,230.75
169,Giannis Antetokounmpo,Jordan Nwora,Khris Middleton,Clint Capela,Jrue Holiday,Bobby Portis,Jeff Teague,Bogdan Bogdanovic,20210629,49900,242.570503,190.00
170,Dario Saric,Mikal Bridges,Paul George,Deandre Ayton,Chris Paul,Abdel Nader,Torrey Craig,Devin Booker,20210630,49800,222.910350,229.25


In [None]:
soln_df.FPTS.mean()

236.59447674418604

In [None]:
soln_df.to_csv(f'greedy{param}.csv', line_terminator='\n', index=False)

In [None]:
candidates