<a href="https://colab.research.google.com/github/MarrtinJ/fantasy-bball-opt/blob/main/naiveBaseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# naiveBaseline.ipynb
Sets a naive baseline for selecting an optimal fantasy basketball team by taking the average score of n valid lineups per day.

# Installing/Importing Necessary Libraries

In [None]:
import numpy as np
import pandas as pd

In [None]:
# stop the pandas indexing/splicing warning from appearing
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('dataset2.csv')
df.shape

(43482, 27)

In [None]:
# starting with just a single date

dates = df['Date'].unique()
df = df[['Date', 'Name', 'Team', 'Position', 'Salary', 'FPTS']]
df.reset_index(inplace=True, drop=True)
# df.shape

# Creating DFs for each position

In [None]:
def buildPositionDFs(df1):
  # incr determines if the players are each position are sorted by increasing salary
  incr = [True, False]

  PGs = df1[df1['Position'] == 'PG']
  PGs.sort_values(by=['Salary', 'FPTS'], ascending=incr, inplace=True)
  PGs.reset_index(inplace=True, drop=True)


  SGs = df1[df1['Position'] == 'SG']
  SGs.sort_values(by=['Salary', 'FPTS'], ascending=incr, inplace=True)
  SGs.reset_index(inplace=True, drop=True)

  SFs = df1[df1['Position'] == 'SF']
  SFs.sort_values(by=['Salary', 'FPTS'], ascending=incr, inplace=True)
  SFs.reset_index(inplace=True, drop=True)

  PFs = df1[df1['Position'] == 'PF']
  PFs.sort_values(by=['Salary', 'FPTS'], ascending=incr, inplace=True)
  PFs.reset_index(inplace=True, drop=True)

  Cs = df1[df1['Position'] == 'C']
  Cs.sort_values(by=['Salary', 'FPTS'], ascending=incr, inplace=True)
  Cs.reset_index(inplace=True, drop=True)

  Gs = df1[df1['Position'] == 'G']
  Gs.sort_values(by=['Salary', 'FPTS'], ascending=incr, inplace=True)
  Gs.reset_index(inplace=True, drop=True)

  Fs = df1[df1['Position'] == 'F']
  Fs.sort_values(by=['Salary', 'FPTS'], ascending=incr, inplace=True)
  Fs.reset_index(inplace=True, drop=True)


  Utils = df1[df1['Position'] == 'UTIL']
  Utils.sort_values(by=['Salary', 'FPTS'], ascending=incr, inplace=True)
  Utils.reset_index(inplace=True, drop=True)

  return [PGs, SGs, SFs, PFs, Cs, Gs, Fs, Utils]

# DFS Setup / Helper Functions

In [None]:
# checkDuplicates takes in a list of players, checks if list contains duplicates
# if yes return True, else return False
def checkDuplicates(players):
  setOfPlayers = set()
  for player in players:
      if player in setOfPlayers:
          return True
      else:
          setOfPlayers.add(player)         
  return False

In [None]:
# checkSalary takes in a list of players, checks if salary > 50000
# if yes return true, else return false
def checkSalary(players):
  total_salary = getSalary(players)
  if total_salary > 50000:
    return True
  return False

# getSalary takes in a list of players, returns the combined salary of all players
def getSalary(players):
  total_salary = 0
  for player in players:
    player_sal = currentPlayers[currentPlayers['Name'] == player].iloc[0].Salary
    total_salary += player_sal
  return total_salary

In [None]:
def getSingleScore(player):
  return currentPlayers[currentPlayers['Name'] == player].iloc[0].FPTS

def getSingleSalary(player):
  return currentPlayers[currentPlayers['Name'] == player].iloc[0].Salary

In [None]:
# getScore takes in a list of players, returns the fantasy points scored by players
def getScore(players):
  total_score = 0
  for player in players:
    player_score = getSingleScore(player)
    # print(player, player_score)
    total_score += player_score
  return total_score

# Randomly Generating Lineups

In [None]:
data = []

for i in range (len(dates)):
  date = dates[i]
  curDay = df[df['Date']==date]

  Utils = curDay.copy()
  Utils['Position'] = 'UTIL'
  
  onePosition = curDay[ curDay['Position'].str.contains('/')==False ]

  multPositions = curDay[curDay['Position'].str.contains("/")]
  multPositions.reset_index(inplace=True, drop=True)

  pos1 = []
  pos2 = []
  for index, row in multPositions.iterrows():
    playerPos1, playerPos2 = row['Position'].split('/')
    copy1 = row.copy()
    copy1['Position'] = playerPos1
    copy2 = row.copy()
    copy2['Position'] = playerPos2
    # print(copy)
    pos1.append(copy1)
    pos2.append(copy2)
    # print(playerPos1, playerPos2)

  pos1 = pd.DataFrame(pos1)
  pos2 = pd.DataFrame(pos2)
  # print(onePosition.shape, pos1.shape, pos2.shape)

  currentPlayers = pd.concat([onePosition, pos1, pos2], ignore_index=True)

  Gs = currentPlayers[currentPlayers['Position'].isin(['PG', 'SG'])]
  Gs.drop_duplicates(subset='Name', inplace=True)
  Gs['Position'] = 'G'

  Fs = currentPlayers[currentPlayers['Position'].isin(['SF', 'PF'])]
  Fs.drop_duplicates(subset='Name', inplace=True)
  Fs['Position'] = 'F'

  currentPlayers = pd.concat([currentPlayers, Gs, Fs, Utils], ignore_index=True)
  currentPlayers.sort_values(['Name', 'Team', 'Position'], na_position='first', inplace=True, ignore_index=True)

  # print(Gs.shape, Fs.shape, Utils.shape)
  
  # print(currentPlayers.shape)

  l1 = buildPositionDFs(currentPlayers)

  scores = []
  numLineups = 0

  while numLineups < 100:
    lineup = []
    for j in range(8):
      player = l1[j].sample() # returns a dataframe
      # print(player['Name'].iloc[0])
      lineup.append(player['Name'].iloc[0])
    
    score = getScore(lineup)
    if not checkSalary(lineup):
      # print(lineup)
      # print(getSalary(lineup), score)
      scores.append(score)
      numLineups += 1
  
  scores = np.array(scores)
  print(date, scores, scores.mean())
  data.append([date, scores.mean()])
# l1

# data

20191022 [115.   205.   208.5  189.75 136.25 178.25 198.75 210.75 228.5  144.
 138.75 187.75 142.75 180.25 160.75 219.5  133.75 156.   215.75 182.75
 137.5  172.75 153.5  158.75 157.75 201.   216.   255.5  205.75 159.5
 184.75 150.25 234.5  169.   169.25 217.25 137.   163.   245.   121.
 149.5  194.25 154.25 210.5  152.25 191.75 234.5  161.25 180.5  177.75
 150.75 188.   158.   205.75 126.25 181.5  173.25 188.25 144.25 196.
 185.25 161.25 158.5  128.5  189.5  196.25 202.5  206.5  188.   216.25
 196.5  186.25 218.5  170.5  135.75 169.25 151.   217.25 196.25 116.5
 133.   176.5  145.75 156.5  186.25 174.5  156.75 177.   176.75 120.
 156.   174.25 246.25 210.25 189.5  142.75 134.25 229.5  198.25 125.25] 176.5975
20191023 [240.75 171.   167.25 107.75 152.75 137.75 171.5  130.75 192.5  146.25
 188.25 117.5  106.5  166.75 170.75 188.75 135.   165.5  109.   228.
 113.25 160.25 129.25  98.75 143.5  164.   209.25 227.5  169.5  167.75
 165.5  165.25 176.25 196.25 162.25 265.5  234.25 174.75 203.

In [None]:
currentPlayers

Unnamed: 0,Date,Name,Team,Position,Salary,FPTS
0,20210701,Bobby Portis,Mil,C,4500,43.00
1,20210701,Bobby Portis,Mil,F,4500,43.00
2,20210701,Bobby Portis,Mil,PF,4500,43.00
3,20210701,Bobby Portis,Mil,UTIL,4500,43.00
4,20210701,Bogdan Bogdanovic,Atl,G,5100,43.75
...,...,...,...,...,...,...
75,20210701,Solomon Hill,Atl,SF,3000,0.00
76,20210701,Solomon Hill,Atl,UTIL,3000,0.00
77,20210701,Thanasis Antetokounmpo,Mil,F,3000,2.00
78,20210701,Thanasis Antetokounmpo,Mil,PF,3000,2.00


In [None]:
naive = pd.DataFrame(data, columns =['Date', 'AvgFPTS'])

In [None]:
naive

Unnamed: 0,Date,AvgFPTS
0,20191022,176.5975
1,20191023,158.4375
2,20191024,166.5300
3,20191025,155.5550
4,20191026,159.1825
...,...,...
351,20210627,190.3050
352,20210628,136.7475
353,20210629,126.6225
354,20210630,144.9025


In [None]:
naive.to_csv(f'naive{numLineups}.csv', line_terminator='\n', index=False)