In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import requests
from bs4 import BeautifulSoup
from unidecode import unidecode
from collections import Counter
from datetime import datetime, timedelta
from team_maker import *

%load_ext autoreload
%autoreload 2
%matplotlib inline

In [5]:
positions = ['C', '1B', '2B', 'SS', '3B', 'OF', 'OF', 'OF', 'P', 'P']

df = pd.read_csv('DKSalaries.csv')
# df['PPK'] = df.AvgPointsPerGame / df.Salary


# eff = pd.concat(baselist)
# eff.AvgPointsPerGame.sum()
# eff.sort_values('AvgPointsPerGame')
# pd.concat(,  ].extend(baselist))

In [26]:
eff.AvgPointsPerGame.sum()

105.0

In [110]:
def get_pitcher_adjustments():
    """
    scrapes data from baseball prospectus and returns a dataframe with a 
    multiplyer for each pitcher based on their WHIP
    """
    rq = requests.get('https://legacy.baseballprospectus.com/sortable/index.php?cid=2508773').content
    souped = BeautifulSoup(rq, 'html.parser')
    ttdata = souped.find('table', {'id' : 'TTdata'})
    hdrs = [x.text for x in ttdata.find('tr').findAll('td')]
    data = []
    for r in ttdata.findAll('tr')[1:]:
        data.append([x.text for x in r.findAll('td')])

    pf = pd.DataFrame(data, columns = hdrs)
    numheaders = list(pf)[3:]
    for header in numheaders:
        pf[header] = pd.to_numeric(pf[header])
    pf = pf[pf['G'] > (pf['G'].mean() - pf["G"].std())]
    pf['WHIP'] = (pf['H'] + pf['BB'] + pf['HBP']) / pf['IP']
    # pf.dtypes
    pf['MULT'] = pf['WHIP'] / pf['WHIP'].mean()
    return pf[['NAME', 'MULT']]    

def get_projected_players():
    """
    Gets projected players from 
    https://rotogrinders.com/lineups/mlb?site=draftkings
    """
    players = []
    
    rq = requests.get('https://rotogrinders.com/lineups/mlb?site=draftkings').content

    souped = BeautifulSoup(rq, 'html.parser')

    divs = souped.findAll('div', {'class' : 'blk game'})
    for div in divs:
        
        hm = div.find('div', {'class' : 'blk home-team'})
        aw = div.find('div', {'class' : 'blk away-team'})

        hp = hm.find('div', {'class' : 'pitcher players'}).find('a', {'class' : 'player-popup'}).text
        ap = aw.find('div', {'class' : 'pitcher players'}).find('a', {'class' : 'player-popup'}).text

        ht = hm.find('ul', {'class' : 'players unconfirmed'})#.findAll('li', {'class' : 'player'})
        at = aw.find('ul', {'class' : 'players unconfirmed'})#.findAll('li', {'class' : 'player'})

        ht = ht.findAll('li', {'class' : 'player'})
        ht = [x.find('span', {'class' : 'pname'}).text.strip() for x in ht]

        at = at.findAll('li', {'class' : 'player'})
        at = [x.find('span', {'class' : 'pname'}).text.strip() for x in at]

        players.extend([hp, ap])
        players.extend(at)
        players.extend(ht)

    return players

def choose_top_players_pos(df, positions):
    """
    df = DataFrame of all available players
    positions = list of strings, positions to be staffed,
                each position should be in the list the number
                of times it is in the roster
                
    returns a dataframe of the top n players for each position, 
    where n is the number of times the position appears in the list
    """
    df = df.set_index(['Roster Position'])
    
    # Getting counter dict of positions
    pos_count = Counter(positions)
    
    # Creating a list of mini dfs with top players by position
    baselist = [df.loc[x][:pos_count[x]] for x in pos_count]
   
    # Making one big dataframe and concatenating
    return pd.concat(baselist)

def choose_top_players_flex(df, num):
    """
    df = DataFrame of all available players
    num = int, number of players to choose
    
    returns a dataframe of the top n players, where n == num
    """
    
    # Removing injured players from the dataframe
    inj = get_injured_players('baseball')
    df = df[~df['Name'].isin(inj)]
    
    # Returning top players
    return df[:num]

def get_injured_players(sport):
    """
    sport = str, the targeted sport
    """
    
    # Dict of pages for injury reports
    urls = {
        'baseball' : 'https://scores.nbcsports.com/mlb/stats.asp?file=inj'
    }
    url = urls[sport]
    
    # Getting html of injury report page
    rq = requests.get(url)
    souped = BeautifulSoup(rq.content, 'html.parser')
    tds = souped.find_all('td')
    
    # Creating list of injured players
    inj = []
    for n in range(5, len(tds), 3):
        
        td = tds[n].getText()
        if ',' in td:
            
            # Matching source unicode formatting
            td = unidecode(td.split(',')[0])
            
            inj.append(td)

    return inj
    

def make_efficient_team(df, positions = None, num = None):
    """
    df = DataFrame of all available players
    positions = list of strings
    num = int
    
    Returns a dataframe of a team with the best ratio of 
    average points to cost.
    """
    df = df.sort_values('PPK', ascending=False)
    
    if positions != None: 
        return choose_top_players_pos(df, positions)
    if num != None:
        return choose_top_players_flex(df, num)
        

def make_best_team(df, positions = None, num = None):
    """
    df = DataFrame of all available players
    positions = list of strings
    num = int
    
    Returns a dataframe of a team with the best average points.
    """

    df = df.sort_values('AvgPointsPerGame', ascending=False)
    
    if positions != None: 
        return choose_top_players_pos(df, positions)
    if num != None:
        return choose_top_players_flex(df, num)
    
def make_team_w_pos(positions):
    """
    positions = list of strings, positions to be staffed,
                each position should be in the list the number
                of times it is in the roster
    
    returns a dataframe of a team formed with the efficient to
    best method. Starts with the most efficient players for 
    their price, then adds the best players, regardless of price, 
    while keeping the whole team's price under the cap
    """
   
    df = prep_dataframe()
    
    eff = make_efficient_team(df, positions = positions)
    best = make_best_team(df, positions = positions)
    
    # 'DEFICIT' = The expectation of best - efficient player
    eff['DEFICIT'] = best.AvgPointsPerGame - eff.AvgPointsPerGame
    best['DEFICIT'] = best.AvgPointsPerGame - eff.AvgPointsPerGame
    
    eff = eff.sort_values('DEFICIT', ascending=False).reset_index() 
    best = best.sort_values('DEFICIT', ascending=False).reset_index()
    
    # Replacing efficient players with best players in deficit order
    for i, e in eff.iterrows():

        ctotal = eff.Salary.sum()
        ctminus = ctotal - e.Salary

        if ctminus + best.Salary.loc[i] <= 50000:
            if best.Name.loc[i] not in eff.Name.unique():
                eff.loc[i] = best.loc[i]
            
    print('Salary:', eff.Salary.sum())
    print('Avg Points:', eff.AvgPointsPerGame.sum())
    print('Avg w/o cap:', best.AvgPointsPerGame.sum())
    print('Sal w/o cap:', best.Salary.sum())
    
    return eff

def make_team_flex(num):
    """
    num = int, number of players to include on the final roster
    
    returns a dataframe of a team formed with the efficient to
    best method. Starts with the most efficient players for 
    their price, then adds the best players, regardless of price, 
    while keeping the whole team's price under the cap
    """
    
    df = prep_dataframe()
    
    # num quadrupled for 'best' to give more options
    eff = make_efficient_team(df, num =num).reset_index()
    best = make_best_team(df, num = num*4).reset_index()
    
    # 'DEFICIT' = The expectation of best - efficient player
    eff['DEFICIT'] = best.AvgPointsPerGame - eff.AvgPointsPerGame
    best['DEFICIT'] = best.AvgPointsPerGame - eff.AvgPointsPerGame
    
    eff = eff.sort_values('DEFICIT', ascending=False).reset_index() 
    best = best.sort_values('AvgPointsPerGame', ascending=False).reset_index()
    
    # Replacing efficient players one at a time while staying under cap
    for i, e in eff.iterrows():
        
        for j, r in best.iterrows():
            
            ctotal = eff.Salary.sum()
            ctminus = ctotal - e.Salary

            if ctminus + best.Salary.loc[j] <= 50000 \
            and best.Name.loc[j] not in eff.Name.unique()\
            and e.AvgPointsPerGame < r.AvgPointsPerGame:
                
                eff.loc[i] = best.loc[j]
                break
            
    print('Salary:', eff.Salary.sum())
    print('Avg Points:', eff.AvgPointsPerGame.sum())
    print('Avg w/o cap:', best.AvgPointsPerGame[:num].sum())
    print('Sal w/o cap:', best.Salary[:num].sum())
    
    return eff, best

def prep_dataframe():
    """
    Builds a dataframe from DKSalaries.csv and prepares it
    for use by other functions
    """
    df = pd.read_csv('DKSalaries.csv')
    
    # Adding opposing team info
    all_matchups = []
    for x in df['Game Info']:
        ats = x.split('@')
        t1 = ats[0]
        t2 = ats[1].split(' ')[0]
        all_matchups.append([t1, t2])

    OpposingTeam = []
    for mu, ta in zip(all_matchups, df.TeamAbbrev):
        if mu[0] == ta:
            OpposingTeam.append(mu[1])
        else:
            OpposingTeam.append(mu[0])

    df['OpposingTeam'] = OpposingTeam


    df['PPK'] = df.AvgPointsPerGame / df.Salary * 1000
#     inj = get_injured_players('baseball')
#     df = df[~df['Name'].isin(inj)]
#     df = clear_dead_weight(df.set_index('Roster Position'))
    act = get_projected_players()
    df = df[df['Name'].isin(act)]
    
    return df#.reset_index()

def clear_dead_weight(df):
    
    df = df.sort_values('PPK', ascending=False)
    output = pd.DataFrame()
    
    for pos in [x for x in df.index.unique() if len(x) < 3]:
        print(pos)
        tdf = df.loc[pos]
#         return tdf
        floor = tdf.AvgPointsPerGame.iloc[0]
        tdf = tdf[tdf['AvgPointsPerGame'] >= floor]
#         tdf['Roster Position'] = pos

        output = pd.concat([output, tdf])
    
#     output = output.set_index('Roster Position')
    return output

def create_active_player_filter():
    """
    Gets lineups from news sites and returns a list of players who are
    playing tonight.
    """
    rq = requests.get('https://www.mlb.com/starting-lineups').content
    souped = BeautifulSoup(rq, 'html.parser')

    divs = souped.findAll("div", {"class": "starting-lineups__matchup"})

    avails = pd.DataFrame()
    all_sps = []

    for div in divs:
        htm = div.find("span", {"class": "starting-lineups__team-name starting-lineups__team-name--home"}).text.strip()
        atm = div.find("span", {"class": "starting-lineups__team-name starting-lineups__team-name--away"}).text.strip()

        htmlu = div.find("ol", {"class": "starting-lineups__team starting-lineups__team--home"})
        atmlu = div.find("ol", {"class": "starting-lineups__team starting-lineups__team--away"})

        htmbs = htmlu.findAll("li", {"class": "starting-lineups__player"})
        atmbs = atmlu.findAll("li", {"class": "starting-lineups__player"})

        sps = div.findAll("div", {"class" : "starting-lineups__pitcher-name"})
        all_sps.extend([x.text.strip() for x in sps])

        htmbs = pd.DataFrame([x.text.split(' ') for x in htmbs])
        htmbs[4] = htm
        atmbs = pd.DataFrame([x.text.split(' ') for x in atmbs])
        atmbs[4] = atm

        avails = pd.concat([avails, htmbs, atmbs])

    avails.columns=['fname', 'lname', 'b', 'pos', 'team']

    avails['name'] = avails.fname + ' ' + avails.lname

    avails = avails[['name', 'b', 'pos', 'team']]

    anames = []

    for i, x in avails.iterrows():
        if x['b'] == 'Jr.':
            anames.append(x['name'] + ' ' +  x['b'])  
        else:
            anames.append(x['name'])

    return all_sps + anames

def make_team_w_pos_ratchet(positions, method = 'points', df = None):
    """
    positions = list of strings, positions to be staffed,
                each position should be in the list the number
                of times it is in the roster
    
    returns a dataframe of a team formed with the efficient to
    best method. Starts with the most efficient players for 
    their price, then adds the best players, regardless of price, 
    while keeping the whole team's price under the cap
    """
    
    if df == None:
        df = prep_dataframe()
        
    df = df.sort_values('AvgPointsPerGame', ascending=False)
    
    best = make_best_team(df, positions = positions)
    
    swaps = 0
    
    while best.Salary.sum() > 50000:
        
        over = best.Salary.sum() - 50000
        
        swaps += 1
        print('swaps:', swaps)
        rmdf = df[~df['Name'].isin(best.Name)]
        
        loss = []
        alts = []
        diffs = []
        for i, r in best.iterrows():
            
            tdf = rmdf[rmdf['Position'] == r['Position']]
            tdf = tdf[tdf['Salary'] < r['Salary']]
            
            if len(tdf) > 0:
                ns, na = tdf.iloc[0][['Salary', 'AvgPointsPerGame']]
                os, oa = r[['Salary', 'AvgPointsPerGame']]
                alts.append(tdf.iloc[0])

    #             return ns, np, os, op

#                 ls = (os - ns) / (oa - na)
                if method == 'points':
                    ls = oa - na
                    
                if method == 'ratio':
                    ls = (oa - na) / (os - ns)
                
                diffs.append([os - ns])
                loss.append(ls)
            else:
                loss.append(10000.5)
                alts.append(None)
                diffs.append([os])
        
#         if any([x <= over] for x in)
#         loss = np.array(loss)
#         for i in range(len(diffs)):
#             if diffs[i] > over:
#                 loss[i] = max(loss)
        rep = np.array(loss).argmin()
        best.iloc[rep] = alts[rep]
    
    
            
#     print('Salary:', eff.Salary.sum())
#     print('Avg Points:', eff.AvgPointsPerGame.sum())
    print('Avg points:', best.AvgPointsPerGame.sum())
    print('Salary:', best.Salary.sum())
    
    return best


In [5]:
positions = ['C', '1B', '2B', 'SS', '3B', 'OF', 'OF', 'OF', 'P', 'P']

# df = pd.read_csv('DKSalaries.csv')
# df['PPK'] = df.AvgPointsPerGame / df.Salary

# # df = df[~df['Name'].isin(['Yoenis Cespedes', 'Robinson Cano'])]

# eff = make_efficient_team(df)

# best = make_best_team(df)

# eff['DEFICIT'] = best.AvgPointsPerGame - eff.AvgPointsPerGame
# best['DEFICIT'] = best.AvgPointsPerGame - eff.AvgPointsPerGame

# eff = eff.sort_values('DEFICIT', ascending=False).reset_index() 
# best = best.sort_values('DEFICIT', ascending=False).reset_index()

# for i, e in eff.iterrows():
    
#     ctotal = eff.Salary.sum()
#     ctminus = ctotal - e.Salary
    
#     if ctminus + best.Salary.loc[i] <= 50000:
#         eff.loc[i] = best.loc[i]
        


# eff.AvgPointsPerGame.sum()

# make_team_w_pos(df, positions)
# eff, best = make_team_flex(df, 6)

# eff['DEFICIT'] = best.AvgPointsPerGame - eff.AvgPointsPerGame
# eff

# # loss = 
# make_team
# make_team_w_pos_ratchet(positions)
# make_team_w_pos(positions)
df = make_team_w_pos_ratchet(positions, method = 'ratio')
export_team(df)
# df
# max([3,5,6])

AttributeError: 'NoneType' object has no attribute 'sort_values'

In [2]:
# var = []
# tdf = df.loc['P'].sort_values('AvgPointsPerGame', ascending=False)

# for i in range(len(tdf[:15]) - 1):
#     apg,salary = tdf.iloc[i][['AvgPointsPerGame', 'Salary']]
#     apg1,salary1 = tdf.iloc[i+1][['AvgPointsPerGame', 'Salary']]
#     div = salary - salary1 / 100
#     loss = (apg - apg1) / ((salary - salary1) / 100)

#     var.append(loss)
# var

make_team_w_pos_ratchet(positions)

NameError: name 'positions' is not defined

In [398]:
# 13 * 15 * 1 * 1 * 16 * 7 * 15
# df.loc[df.index.unique()[5:]]
tdf
# df[((df['Position'] == 'OF') & (df['Salary'] <= 6000))]
# df.set_index('Roster Position', inplace=True)
# df.loc["P"]

Unnamed: 0_level_0,Position,Name + ID,Name,ID,Salary,Game Info,TeamAbbrev,AvgPointsPerGame,PPK
Roster Position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
P,SP,Chris Sale (11020015),Chris Sale,11020015,13500,NYY@BOS 08/03/2018 07:10PM ET,BOS,28.68,2.124444
P,SP,Gerrit Cole (11020018),Gerrit Cole,11020018,12800,HOU@LAD 08/03/2018 10:10PM ET,HOU,26.57,2.075781
P,SP,Max Scherzer (11020017),Max Scherzer,11020017,13000,CIN@WAS 08/03/2018 07:05PM ET,WAS,26.43,2.033077
P,SP,Justin Verlander (11019167),Justin Verlander,11019167,13100,HOU@LAD 08/03/2018 10:10PM ET,HOU,26.18,1.998473
P,SP,Trevor Bauer (11020016),Trevor Bauer,11020016,13300,LAA@CLE 08/03/2018 07:10PM ET,CLE,25.50,1.917293
P,SP,Jacob deGrom (11019168),Jacob deGrom,11019168,12400,ATL@NYM 08/03/2018 07:10PM ET,NYM,24.73,1.994355
P,SP,Tommy Milone (11020427),Tommy Milone,11020427,4000,CIN@WAS 08/03/2018 07:05PM ET,WAS,23.20,5.800000
P,SP,James Paxton (11020034),James Paxton,11020034,9600,TOR@SEA 08/03/2018 10:10PM ET,SEA,22.87,2.382292
P,SP,Luis Severino (11019172),Luis Severino,11019172,9200,NYY@BOS 08/03/2018 07:10PM ET,NYY,22.86,2.484783
P,SP,Charlie Morton (11020025),Charlie Morton,11020025,11100,HOU@LAD 08/03/2018 10:10PM ET,HOU,22.51,2.027928


In [64]:
eff.Salary.sum()

49800

In [65]:
best.AvgPointsPerGame.sum()

143.88

['Jake Lamb',
 'Shelby Miller',
 'Jarrod Dyson',
 'Deven Marrero',
 'Taijuan Walker',
 'Sam Freeman',
 'Peter Moylan',
 'Arodys Vizcaíno',
 'Brandon McCarthy',
 "Darren O'Day",
 'Mike Soroka',
 'José Ramírez',
 'Jacob Lindgren',
 'Grant Dayton',
 'Steve Wilkerson',
 'Craig Gentry',
 'Richard Bleier',
 'Pedro Araujo',
 'Gabriel Ynoa',
 'Rafael Devers',
 'Chris Sale',
 'Eduardo Rodriguez',
 'Christian Vázquez',
 'Steven Wright',
 'Dustin Pedroia',
 'Carson Smith',
 'Marco Hernández',
 'Austin Maddox',
 'Kris Bryant',
 'Anthony Bass',
 'Brandon Morrow',
 'Justin Hancock',
 'Yu Darvish',
 'Drew Smyly',
 'Nate Jones',
 'Danny Farquhar',
 'Miguel González',
 'Jesse Winker',
 'Jackson Stephens',
 'Alex Blandino',
 'Scott Schebler',
 'Rookie Davis',
 'Tyler Naquin',
 'Josh Tomlin',
 'Lonnie Chisenhall',
 'Evan Marshall',
 'Andrew Miller',
 'Nick Goody',
 'Danny Salazar',
 'Cody Anderson',
 'Chris Rusin',
 'Mike Dunn',
 'Chad Bettis',
 'Rayan González',
 'Michael Fulmer',
 'Miguel Cabrera',
 'D

['Homer Bailey',
 'Noah Syndergaard',
 'Kyle Gibson',
 'Trevor Bauer',
 'Luke Weaver',
 'Wei-Yin Chen',
 'Wade LeBlanc',
 'Martin Perez',
 'Lance Lynn',
 'Dylan Covey',
 'Cole Hamels',
 'Jakob Junis',
 'Joe Musgrove',
 'Kyle Freeland',
 'Jake Arrieta',
 'Zack Godley',
 'Matthew Boyd',
 'Nick Tropeano',
 'Charlie Morton',
 'Dereck Rodriguez',
 'Amed Rosario',
 'Jeff McNeil',
 'Wilmer Flores',
 'Michael Conforto',
 'Todd Frazier',
 'Brandon Nimmo',
 'Austin Jackson',
 'Kevin Plawecki',
 'Noah Syndergaard',
 'Jose Peraza',
 'Joey Votto',
 'Scooter Gennett',
 'Eugenio Suarez',
 'Mason Williams',
 'Tucker Barnhart',
 'Preston Tucker',
 'Homer Bailey',
 'Billy Hamilton',
 'Francisco Lindor',
 'Michael Brantley',
 'Jose Ramirez',
 'Edwin Encarnacion',
 'Yonder Alonso',
 'Leonys Martin',
 'Jason Kipnis',
 'Roberto Perez',
 'Brandon Guyer',
 'Joe Mauer',
 'Eddie Rosario',
 'Jorge Polanco',
 'Logan Morrison',
 'Miguel Sano',
 'Max Kepler',
 'Logan Forsythe',
 'Jake Cave',
 'Bobby Wilson',
 'Dere

In [367]:
# datetime.today() - timedelta(5)
avails.pos.unique()

array(['SS', '2B', '1B', 'LF', '3B', 'RF', 'CF', 'C', 'P', 'DH', '(R)'],
      dtype=object)

In [373]:
for i, x in avails.iterrows():
    if x['b'] == 'Jr.':
        print(x['name'] + ' ' +  x['b'])  
    else:
        print(x['name'])

Amed Rosario
Jeff McNeil
Wilmer Flores
Michael Conforto
Todd Frazier
Brandon Nimmo
Austin Jackson
Kevin Plawecki
Noah Syndergaard
Jose Peraza
Joey Votto
Scooter Gennett
Eugenio Suarez
Mason Williams
Tucker Barnhart
Preston Tucker
Homer Bailey
Billy Hamilton
Francisco Lindor
Michael Brantley
Jose Ramirez
Edwin Encarnacion
Yonder Alonso
Leonys Martin
Jason Kipnis
Roberto Perez
Brandon Guyer
Joe Mauer
Eddie Rosario
Jorge Polanco
Logan Morrison
Miguel Sano
Max Kepler
Logan Forsythe
Jake Cave
Bobby Wilson
Derek Dietrich
Brian Anderson
J.T. Realmuto
Justin Bour
Starlin Castro
Martin Prado
JT Riddle
Magneuris Sierra
Wei-Yin Chen
Matt Carpenter
Yadier Molina
Jose Martinez
Marcell Ozuna
Jedd Gyorko
Paul DeJong
Harrison Bader
Yairo Munoz
Luke Weaver
Shin-Soo Choo
Rougned Odor
Elvis Andrus
Adrian Beltre
Jurickson Profar
Robinson Chirinos
Joey Gallo
Isiah Kiner-Falefa
Carlos Tocci
Dee Gordon
Jean Segura
Mitch Haniger
Nelson Cruz
Kyle Seager
Ryon Healy
Cameron Maybin
Mike Zunino
Guillermo Heredia
Y

In [33]:
players = []
# rq = requests.get('https://rotogrinders.com/lineups/mlb?site=draftkings').content
souped = BeautifulSoup(rq, 'html.parser')
# ("span", {"class": "starting-lineups__team-name starting-lineups__team-name--home"})
divs = souped.findAll('div', {'class' : 'blk game'})
for div in divs:
    hm = div.find('div', {'class' : 'blk home-team'})
    aw = div.find('div', {'class' : 'blk away-team'})
    
    hp = hm.find('div', {'class' : 'pitcher players'}).find('a', {'class' : 'player-popup'}).text
    ap = aw.find('div', {'class' : 'pitcher players'}).find('a', {'class' : 'player-popup'}).text
    
    ht = hm.find('ul', {'class' : 'players unconfirmed'})#.findAll('li', {'class' : 'player'})
    at = aw.find('ul', {'class' : 'players unconfirmed'})#.findAll('li', {'class' : 'player'})
    
    ht = ht.findAll('li', {'class' : 'player'})
    ht = [x.find('span', {'class' : 'pname'}).text.strip() for x in ht]
    
    at = at.findAll('li', {'class' : 'player'})
    at = [x.find('span', {'class' : 'pname'}).text.strip() for x in at]
    
    players.extend([hp, ap])
    players.extend(at)
    players.extend(ht)
    
#     print(hp, ap)

players

['Gio Gonzalez',
 'Anibal Sanchez',
 'Ronald Acuna',
 'Ozzie Albies',
 'Freddie Freeman',
 'Nick Markakis',
 'Kurt Suzuki',
 'Adam Duvall',
 'Johan Camargo',
 'Dansby Swanson',
 'Anibal Sanchez',
 'Adam Eaton',
 'Trea Turner',
 'Anthony Rendon',
 'Bryce Harper',
 'Juan Soto',
 'Matt Adams',
 'Daniel Murphy',
 'Matt Wieters',
 'Gio Gonzalez',
 'Corey Kluber',
 'Jose Berrios',
 'Joe Mauer',
 'Eddie Rosario',
 'Jorge Polanco',
 'Miguel Sano',
 'Max Kepler',
 'Logan Forsythe',
 'Logan Morrison',
 'Bobby Wilson',
 'Jake Cave',
 'Francisco Lindor',
 'Michael Brantley',
 'Jose Ramirez',
 'Edwin Encarnacion',
 'Yonder Alonso',
 'Melky Cabrera',
 'Jason Kipnis',
 'Yan Gomes',
 'Leonys Martin',
 'Junior Guerra',
 'Robbie Erlin',
 'Manuel Margot',
 'Eric Hosmer',
 'Hunter Renfroe',
 'Christian Villanueva',
 'Austin Hedges',
 'Freddy Galvis',
 'Franmil Reyes',
 'Cory Spangenberg',
 'Robbie Erlin',
 'Lorenzo Cain',
 'Christian Yelich',
 'Jesus Aguilar',
 'Jonathan Schoop',
 'Ryan Braun',
 'Travis S

In [252]:
# rq = requests.get('https://legacy.baseballprospectus.com/sortable/index.php?cid=2508773').content
# souped = BeautifulSoup(rq, 'html.parser')
# ttdata = souped.find('table', {'id' : 'TTdata'})
# hdrs = [x.text for x in ttdata.find('tr').findAll('td')]
# data = []
# for r in ttdata.findAll('tr')[1:]:
#     data.append([x.text for x in r.findAll('td')])
    
# pf = pd.DataFrame(data, columns = hdrs)
# numheaders = list(pf)[3:]
# for header in numheaders:
#     pf[header] = pd.to_numeric(pf[header])
# pf = pf[pf['G'] > (pf['G'].mean() - pf["G"].std())]
# pf['WHIP'] = (pf['H'] + pf['BB'] + pf['HBP']) / pf['IP']
# # pf.dtypes
# pf['MULT'] = pf['WHIP'] / pf['WHIP'].mean()
# pf.sort_values('MULT', ascending=False)
# pf
# pf['G'].describe()

make

Unnamed: 0,AvgPointsPerGame,Game Info,ID,MULT,NAME,Name,Name + ID,OppPitcher,OpposingTeam,PPK,Position,Roster Position,Salary,Team,TeamAbbrev,index,Adjusted
0,25.28,NYM@MIA 08/11/2018 07:10PM ET,11057780,1.199102,,Jacob deGrom,Jacob deGrom (11057780),Pablo Lopez,MIA,1.959690,SP,P,12900,MIA,NYM,131.0,30.313287
1,17.89,LAD@COL 08/11/2018 08:10PM ET,11057781,0.952605,,Clayton Kershaw,Clayton Kershaw (11057781),,COL,1.397656,SP,P,12800,COL,LAD,,17.042108
2,19.88,MIL@ATL 08/11/2018 07:10PM ET,11057787,1.011061,,Mike Foltynewicz,Mike Foltynewicz (11057787),,MIL,1.875472,SP,P,10600,MIL,ATL,,20.099885
3,18.85,CLE@CWS 08/11/2018 07:10PM ET,11057795,1.103888,,Mike Clevinger,Mike Clevinger (11057795),Reynaldo Lopez,CWS,1.984211,SP,P,9500,CWS,CLE,139.0,20.808280
4,17.33,PIT@SF 08/11/2018 09:05PM ET,11057796,0.973450,,Madison Bumgarner,Madison Bumgarner (11057796),,PIT,1.863441,SP,P,9300,PIT,SF,,16.869889
5,15.19,OAK@LAA 08/11/2018 09:07PM ET,11057797,0.977729,,Sean Manaea,Sean Manaea (11057797),Andrew Heaney,LAA,1.651087,SP,P,9200,LAA,OAK,28.0,14.851703
6,16.26,STL@KC 08/11/2018 07:15PM ET,11057799,1.215483,,Miles Mikolas,Miles Mikolas (11057799),Brad Keller,KC,1.806667,SP,P,9000,KC,STL,114.0,19.763749
7,17.04,OAK@LAA 08/11/2018 09:07PM ET,11057802,0.936561,,Andrew Heaney,Andrew Heaney (11057802),Sean Manaea,OAK,1.936364,SP,P,8800,OAK,LAA,23.0,15.959006
8,16.68,SEA@HOU 08/11/2018 07:10PM ET,11057810,0.909748,,Marco Gonzales,Marco Gonzales (11057810),,HOU,1.985714,SP,P,8400,HOU,SEA,,15.174593
9,5.72,STL@KC 08/11/2018 07:15PM ET,11057878,,,Brad Keller,Brad Keller (11057878),Miles Mikolas,STL,0.953333,SP,P,6000,,KC,24.0,


In [103]:
df.head()

Unnamed: 0,Position,Name + ID,Name,ID,Roster Position,Salary,Game Info,TeamAbbrev,AvgPointsPerGame,OpposingTeam
0,SP,Max Scherzer (11036841),Max Scherzer,11036841,P,13900,ATL@WAS 08/07/2018 07:05PM ET,WAS,26.57,ATL
1,SP,Chris Sale (11037695),Chris Sale,11037695,P,13500,BOS@TOR 08/07/2018 07:07PM ET,BOS,28.68,TOR
2,SP,James Paxton (11037696),James Paxton,11037696,P,12900,SEA@TEX 08/07/2018 08:05PM ET,SEA,22.67,TEX
3,SP,Aaron Nola (11037698),Aaron Nola,11037698,P,12400,PHI@ARI 08/07/2018 09:40PM ET,PHI,21.78,ARI
4,SP,Jacob deGrom (11037697),Jacob deGrom,11037697,P,12400,CIN@NYM 08/07/2018 07:10PM ET,NYM,24.87,CIN


In [100]:
df.head()

Unnamed: 0,Position,Name + ID,Name,ID,Roster Position,Salary,Game Info,TeamAbbrev,AvgPointsPerGame
0,SP,Max Scherzer (11036841),Max Scherzer,11036841,P,13900,ATL@WAS 08/07/2018 07:05PM ET,WAS,26.57
1,SP,Chris Sale (11037695),Chris Sale,11037695,P,13500,BOS@TOR 08/07/2018 07:07PM ET,BOS,28.68
2,SP,James Paxton (11037696),James Paxton,11037696,P,12900,SEA@TEX 08/07/2018 08:05PM ET,SEA,22.67
3,SP,Aaron Nola (11037698),Aaron Nola,11037698,P,12400,PHI@ARI 08/07/2018 09:40PM ET,PHI,21.78
4,SP,Jacob deGrom (11037697),Jacob deGrom,11037697,P,12400,CIN@NYM 08/07/2018 07:10PM ET,NYM,24.87


In [124]:
get_projected_players()
# hm.find('ul').findAll('li', {'class' : 'player'})

['Kyle Hendricks',
 'Jeremy Hellickson',
 'Adam Eaton',
 'Trea Turner',
 'Anthony Rendon',
 'Bryce Harper',
 'Juan Soto',
 'Ryan Zimmerman',
 'Daniel Murphy',
 'Spencer Kieboom',
 'Jeremy Hellickson',
 'Anthony Rizzo',
 'Javier Baez',
 'Ben Zobrist',
 'Jason Heyward',
 'David Bote',
 'Kyle Schwarber',
 'Willson Contreras',
 'Kyle Hendricks',
 'Addison Russell',
 'Masahiro Tanaka',
 'Mike Minor',
 'Shin-Soo Choo',
 'Rougned Odor',
 'Elvis Andrus',
 'Adrian Beltre',
 'Jurickson Profar',
 'Ronald Guzman',
 'Willie Calhoun',
 'Isiah Kiner-Falefa',
 'Drew Robinson',
 'Aaron Hicks',
 'Giancarlo Stanton',
 'Gleyber Torres',
 'Didi Gregorius',
 'Miguel Andujar',
 'Luke Voit',
 'Brett Gardner',
 'Austin Romine',
 'Shane Robinson',
 'Dylan Bundy',
 'Nathan Eovaldi',
 'Mookie Betts',
 'Andrew Benintendi',
 'Mitch Moreland',
 'J.D. Martinez',
 'Xander Bogaerts',
 'Rafael Devers',
 'Brock Holt',
 'Daniel Butler',
 'Jackie Bradley',
 'Jonathan Villar',
 'Tim Beckham',
 'Adam Jones',
 'Mark Trumbo',


In [155]:
# df = prep_dataframe()

# pd.merge([df, df[df['Position'] == 'SP']['N']]how='left')


Unnamed: 0,Position,Name + ID,Name,ID,Roster Position,Salary,Game Info,TeamAbbrev,AvgPointsPerGame,OpposingTeam,PPK,index,OppPitcher
0,SP,Gerrit Cole (11047795),Gerrit Cole,11047795,P,12600,SEA@HOU 08/09/2018 08:10PM ET,HOU,26.16,SEA,2.076190,67,Mike Leake
1,SP,Blake Snell (11047418),Blake Snell,11047418,P,11500,BAL@TB 08/09/2018 07:10PM ET,TB,21.76,BAL,1.892174,29,Dylan Bundy
2,SP,Masahiro Tanaka (11047798),Masahiro Tanaka,11047798,P,10300,TEX@NYY 08/09/2018 07:05PM ET,NYY,18.65,TEX,1.810680,38,Mike Minor
3,SP,Jon Gray (11047804),Jon Gray,11047804,P,8800,LAD@COL 08/09/2018 08:40PM ET,COL,17.80,LAD,2.022727,24,Kenta Maeda
4,SP,Kenta Maeda (11047812),Kenta Maeda,11047812,P,8300,LAD@COL 08/09/2018 08:40PM ET,LAD,15.52,COL,1.869880,17,Jon Gray
5,SP,Nathan Eovaldi (11047816),Nathan Eovaldi,11047816,P,7700,BOS@TOR 08/09/2018 07:07PM ET,BOS,16.38,TOR,2.127273,74,Marco Estrada
6,SP,Dylan Bundy (11047817),Dylan Bundy,11047817,P,7600,BAL@TB 08/09/2018 07:10PM ET,BAL,16.58,TB,2.181579,5,Blake Snell
7,SP,Mike Minor (11047824),Mike Minor,11047824,P,7100,TEX@NYY 08/09/2018 07:05PM ET,TEX,13.75,NYY,1.936620,8,Masahiro Tanaka
8,SP,Derek Holland (11047836),Derek Holland,11047836,P,6400,PIT@SF 08/09/2018 10:15PM ET,SF,12.83,PIT,2.004688,338,Clay Holmes
9,OF,Mookie Betts (11047429),Mookie Betts,11047429,OF,5900,BOS@TOR 08/09/2018 07:07PM ET,BOS,12.33,TOR,2.089831,74,Marco Estrada


In [182]:
rq = requests.get('https://www.teamrankings.com/mlb/stat/runs-per-game').content
souped = BeautifulSoup(rq, 'html.parser')
table = souped.find('table', {'class' : 'tr-table datatable scrollable'})
rows = table.findAll('tr')
header = [x.text for x in rows[0].findAll('th')]
content = []
for row in rows[1:]:
    content.append([x.text for x in row.findAll('td')])
    
tmdf = pd.DataFrame(content, columns = header)
tmdf ['Team'] = [team_dict[x] for x in tmdf.Team]
tmdf['2018'] = pd.to_numeric(tmdf['2018'])
tmdf['MOD'] = tmdf['2018'].mean() / tmdf['2018']
tmdf[['Team', 'MOD']]
# tmdf.iloc[:, 1:3]
# souped

Unnamed: 0,Team,MOD
0,BOS,0.826407
1,NYY,0.861573
2,CLE,0.871689
3,HOU,0.898051
4,CHC,0.898051
5,TEX,0.929923
6,ATL,0.935784
7,LAD,0.937754
8,OAK,0.943715
9,COL,0.949751


In [198]:
ddf = prep_dataframe()
ddf
# ddf.MULT_y.unique()
# ddf[['OpposingTeam', 'MULT_y']]

# ddf[ddf['OpposingTeam'] == 'NYY']

Unnamed: 0,AvgPointsPerGame,Game Info,ID,MULT_y,NAME,Name,Name + ID,OppPitcher,OpposingTeam,PPK,Position,Roster Position,Salary,Team,TeamAbbrev,index,Adjusted
0,26.16,SEA@HOU 08/09/2018 08:10PM ET,11047795,1.060556,,Gerrit Cole,Gerrit Cole (11047795),Mike Leake,SEA,2.076190,SP,P,12600,SEA,HOU,67,27.744133
1,21.76,BAL@TB 08/09/2018 07:10PM ET,11047418,1.139216,,Blake Snell,Blake Snell (11047418),Dylan Bundy,BAL,1.892174,SP,P,11500,BAL,TB,29,24.789333
2,18.65,TEX@NYY 08/09/2018 07:05PM ET,11047798,0.929923,,Masahiro Tanaka,Masahiro Tanaka (11047798),Mike Minor,TEX,1.810680,SP,P,10300,TEX,NYY,38,17.343072
3,17.80,LAD@COL 08/09/2018 08:40PM ET,11047804,0.937754,,Jon Gray,Jon Gray (11047804),Kenta Maeda,LAD,2.022727,SP,P,8800,LAD,COL,24,16.692028
4,15.52,LAD@COL 08/09/2018 08:40PM ET,11047812,0.949751,,Kenta Maeda,Kenta Maeda (11047812),Jon Gray,COL,1.869880,SP,P,8300,COL,LAD,17,14.740139
5,16.38,BOS@TOR 08/09/2018 07:07PM ET,11047816,0.978974,,Nathan Eovaldi,Nathan Eovaldi (11047816),Marco Estrada,TOR,2.127273,SP,P,7700,TOR,BOS,74,16.035600
6,16.58,BAL@TB 08/09/2018 07:10PM ET,11047817,1.058036,,Dylan Bundy,Dylan Bundy (11047817),Blake Snell,TB,2.181579,SP,P,7600,TB,BAL,5,17.542244
7,13.75,TEX@NYY 08/09/2018 07:05PM ET,11047824,0.861573,,Mike Minor,Mike Minor (11047824),Masahiro Tanaka,NYY,1.936620,SP,P,7100,NYY,TEX,8,11.846631
8,12.83,PIT@SF 08/09/2018 10:15PM ET,11047836,0.981131,,Derek Holland,Derek Holland (11047836),Clay Holmes,PIT,2.004688,SP,P,6400,PIT,SF,338,12.587907
9,12.10,SEA@HOU 08/09/2018 08:10PM ET,11047851,0.898051,,Mike Leake,Mike Leake (11047851),Gerrit Cole,HOU,2.122807,SP,P,5700,HOU,SEA,2,10.866418


In [168]:
df.TeamAbbrev.unique()

array(['HOU', 'TB', 'NYY', 'COL', 'LAD', 'BOS', 'BAL', 'TEX', 'SF', 'SEA',
       'TOR', 'PIT'], dtype=object)

In [15]:
teams = make_n_teams(1, positions)
export_team(teams)
teams

Extracting csv
Filtering by projected players
Expanding dataframe
Getting pitcher and team score information
swaps: 5
swaps: 6
Avg points (adj): 123.09943840652352
Avg points: 89.12
Salary: 50000
Salary: 47300
Avg Points: 91.16
Avg w/o cap: 91.84
Sal w/o cap: 51400
ratchet


[                 AvgPointsPerGame                      Game Info        ID  \
 Roster Position                                                              
 C                            5.07  OAK@MIN 08/23/2018 08:10PM ET  11121044   
 1B                           7.22  OAK@MIN 08/23/2018 08:10PM ET  11120930   
 2B                           7.92  OAK@MIN 08/23/2018 08:10PM ET  11120916   
 SS                           7.56  OAK@MIN 08/23/2018 08:10PM ET  11120921   
 3B                           8.12  OAK@MIN 08/23/2018 08:10PM ET  11120922   
 OF                           9.66  OAK@MIN 08/23/2018 08:10PM ET  11120913   
 OF                           5.72    KC@TB 08/23/2018 07:10PM ET  11121007   
 OF                           4.46    KC@TB 08/23/2018 07:10PM ET  11121057   
 P                           15.48  ATL@MIA 08/23/2018 07:10PM ET  11120909   
 P                           17.91  OAK@MIN 08/23/2018 08:10PM ET  11120906   
 
                      MULT           NAME         

Unnamed: 0,P,P.1,C,1B,2B,3B,SS,OF,OF.1,OF.2
0,11057169,11057171,11057596,11057549,11057462,11057193,11057472,11057236,11057563,11057654
0,11057170,11057172,11057303,11057359,11057448,11057538,11057192,11057487,11057507,11057458
0,11057791,11057183,11057287,11057228,11057242,11057349,11057328,11057319,11057237,11057340


In [249]:
get_pitcher_adjustments().describe()

Unnamed: 0,MULT
count,603.0
mean,1.0
std,0.293691
min,0.23619
25%,0.832518
50%,0.945883
75%,1.087365
max,3.320969


In [16]:
url = "http://mlb.mlb.com/stats/sortable.jsp#elem=%5Bobject+Object%5D&tab_level=child&\
click_text=Sortable+Player+hitting&game_type='R'&season=2018&season_type=ANY&\
league_code='MLB'&sectionType=sp&statType=hitting&page=1&ts=1535065079547&\
playerType=QUALIFIER&sportCode='mlb'&split=&team_id=&active_sw=&position=&\
page_type=SortablePlayer&sortOrder='desc'&sortColumn=avg&results=&perPage=50&\
timeframe=d1&last_x_days=1&extended=0"

requests.get(url).content

b'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\t\t\t\n\t\n\t\n\n\n \t\n\t\t\n\t\t\n\t\n\t\n\n\n\n\n\t\n\t\n\t\n\t\n\t\n\t\n\t\n\t\n\t\n\t\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n    \n\n    \n\n    \n    \n    \n    \n    \n    \n\n\n\n\n    \n    \n    \n    \n\n\n\n\n\n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n\n    \n    \n    \n\n\n\n\n\n\n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n    \n\n\n\n\n\n    \n    \n\n\n\n\n<!DOCTYPE html>\n\n<!--[if lt IE 7 ]> <html class="ie6 lt-ie9 " lang="en" xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" xmlns:fb="http://www.facebook.com/2008/fbml"> <![endif]-->\n\n<!--[if IE 7 ]> <html class="ie7 lt-ie9 " lang="en" xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" xmlns:fb="http://www.facebook.com/2008/fbml"> <![endif]-->\n\n<!--[if IE 8 ]> <html class="ie8 lt-ie9 " lang="