In [None]:
import os
import sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)
import requests
from bs4 import BeautifulSoup
import re
import lxml.html as lh
import pandas as pd
import calendar
import datetime

In [None]:
# Sample start date - intended to be every Monday
start_date = "09/16/19"
date_1 = datetime.datetime.strptime(start_date, "%m/%d/%y")

# Define week for scores to be calculated
span = []
for i in range(7):
    span.append( str(calendar.month_abbr[(date_1 + datetime.timedelta(days=i)).month]) + " " + str((date_1 + datetime.timedelta(days=i)).day) )

scoresheet = []
header = ['Players', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun', 'Total']

# Base URLs for future use
baseball_reference_url = 'https://www.baseball-reference.com'
gamelog_url_first = 'https://www.baseball-reference.com/players/gl.fcgi?id='
pitcher_url_second = '&t=p&year=2019'
batter_url_second = '&t=b&year=2019'

# Sample Team. Player IDs copied manually from site
team = {
    'realmjt01' : ("PP", "JT Realmuto, PHI", span),
    'garvemi01' : ("PP", "Mitch Garver, MIN", span),
    'cabremi01' : ("PP", "Miguel Cabrera, DET", span),
    'voitlu01' : ("PP", "Luke Voit, NYY", span),
    'albieoz01' : ("PP", "Ozzie Albies, ATL", span),
    'odorro01' : ("PP", "Rougned Odor, TEX", span),
    'rendoan01' : ("PP", "Anthony Rendon, WAS", span), 
    'doziehu01' : ("PP", "Hunter Dozier, KC", span),
    'storytr01' : ("PP", "Trevor Story, COL", span),
    'correca01' : ("PP", "Carlos Correa, HOU", span),
    'bettsmo01' : ("PP", "Mookie Betts, BOS", span),
    'martijd02' : ("PP", "JD Martinez, BOS", span),
    'rosared01' : ("PP", "Eddie Rosario, MIN", span),
    'renfrhu01' : ("PP", "Hunter Renfroe, SD", span),
    'mancitr01' : ("PP", "Trey Mancini, BAL", span),
    'meadoau01' : ("PP", "Austin Meadows, TB", span),
    'scherma01' : ("SP", "Max Scherzer, WAS", span),
    'verlaju01' : ("SP", "Justin Verlander, HOU", span),
    'colege01' : ("SP", "Gerrit Cole, HOU", span),
    'severlu01' : ("SP", "Luis Severino, NYY", span),
    'diazed04' : ("RP", "Ediwn Diaz, NYM", span),
    'kenneia01' : ("RP", "Ian Kennedy, KC", span)      
}

for key in team.keys():
    # Get player game log table
    
    if team[key][0] == 'PP':
        url = gamelog_url_first + key + batter_url_second
    else:
        url = gamelog_url_first + key + pitcher_url_second
    
    page = requests.get(url)
    soup = BeautifulSoup(page.content, "html.parser")
    if team[key][0] == 'PP':
        table = soup.find("table", {"id": "batting_gamelogs"})
    else:
        table = soup.find("table", {"id": "pitching_gamelogs"})
    
    print(url)
    # Get df col titles, number of cols, and data
    col_names = []
    for i in table.thead.tr.find_all("th"):
        col_names.append(i.text)
    num_cols = len(table.tbody.find("tr").contents)
    rows = table.tbody.find_all("tr")

    
    # Remove monthly total rows and add html extensions to PA logs
    for i in rows:
        if len(i) != num_cols:
            rows.remove(i)

    #If a hitter
    if team[key][0] == 'PP':
        logs = []
        for i in rows:
            logs.append(i.find("td", {"data-stat": "PA"}).get("data-endpoint"))

    # Create dataframe from rows and col titles
    data = [[] for _ in range(len(rows))]
    for i in range(len(rows)):
        for j in range(len(rows[i].contents)):
            data[i].append(rows[i].contents[j].text)

    # Replace HTML nonbreak spaces with unicode spaces for Date column ^^^
    nonBreakSpace = u'\xa0'
    for row in data:
        row[3] = row[3].replace(nonBreakSpace, ' ')

    # Make Player dataframe
    df = pd.DataFrame(data, columns = col_names)
    
    
    
    
    
    # Hitters Scoring - add grand slam

    if team[key][0] == 'PP':
        scores = [team[key][1]]

        for day in team[key][-1]:

            day_line = df.loc[df['Date'] == day]
            if day_line.empty:
                day_line = df.loc[df['Date'] == (day + " (1)")]
                day_line = day_line.append(df.loc[df['Date'] == (day + " (2)")])
            if not day_line.empty:
                for index, line in day_line.iterrows():

                    #Calculate daily points
                    rbis = int(line.loc['RBI'][0])
                    runs = int(line.loc['R'][0])
                    stolen_bases = int(line.loc['SB'][0])
                    hits = int(line.loc['H'][0])
                    doubles = int(line.loc['2B'][0])
                    triples = int(line.loc['3B'][0])
                    homers = int(line.loc['HR'][0])
                    tb = hits + doubles + 2 * triples + 3 * homers
                    slam_count = 0

                    if homers >= 1 and rbis >= 4:
                        pop_url = logs[int(line.loc['Rk']) - 1]
                        url2 = "https://www.baseball-reference.com" + pop_url
                        page2 = requests.get(url2)
                        soup2 = BeautifulSoup(page2.content, "html.parser")
                        table2 = soup2.find("table", {"id": "batting_events"})
                        pas = table2.tbody.find_all("tr") 

                        for k in pas:
                            rob = k.find("td", {"data-stat": "runners_on_bases_pbp"}).text
                            play = "Home Run" in k.find("td", {"data-stat": "play_desc"}).text
                            if play and rob == "123":
                                slam_count += 1



                    if doubles >= 1 and triples >= 1 and homers >= 1 and hits > (doubles + triples + homers): 
                        cycle = 50
                    else:
                        cycle = 0

                    at_bats = int(line.loc['AB'][0])

                    if at_bats == 4 and hits == 4: 
                        x4x = 5
                    elif at_bats == 5 and hits == 5:
                        x4x = 10
                    elif at_bats == 6 and hits == 6:
                        x4x = 15
                    elif at_bats == 7 and hits == 7:
                        x4x = 20  
                    else:
                        x4x = 0

                    if line.loc['Date'] == (day + " (2)"):
                        scores[-1] = scores[-1] + tb + 2 * (rbis + runs + stolen_bases) + cycle + x4x + slam_count * 6
                    else:
                        scores.append(tb + 2 * (rbis + runs + stolen_bases) + cycle + x4x + slam_count * 6)
            else:
                scores.append('-')

        # Calculate weekly total for player
        total = 0
        for s in scores:
            if isinstance(s, int):
                total += s
        scores.append(total)
        scoresheet.append(scores)
        
        
        
        
        
    # SP Scoring

    elif team[key][0] == 'SP':
        scores = [team[key][1]]

        for day in team[key][-1]:

            line = df.loc[df['Date'] == day]
            if not line.empty:
                line = line.iloc[0][:]

                #Calculate daily points
                if len(line.loc['Dec']) > 0:
                    if line.loc['Dec'][0] == "W":
                        dec = 5
                    else:
                        dec = 0
                else:
                    dec = 0

                if line.loc['Inngs'] == "SHO" and int(line.loc['H']) == 0 and int(line.loc['BB']) == 0 and int(line.loc['HBP']) == 0 \
                    and int(line.loc['ROE']) == 0 and int(float(line.loc['IP'])) >= 9:
                        spec = 100
                elif line.loc['Inngs'] == "SHO" and int(line.loc['H']) == 0  and int(float(line.loc['IP'])) >= 9:
                    spec = 50
                elif line.loc['Inngs'] == "CG" and int(line.loc['H']) == 0  and int(float(line.loc['IP'])) >= 9:
                    spec = 50
                elif line.loc['Inngs'] == "SHO":
                    spec = 15
                elif line.loc['Inngs'] == "CG":
                    spec = 5
                else:
                    spec = 0

                scores.append(int(float(line.loc['IP'])) + int(line.loc['SO']) + dec + spec)
            else:
                scores.append('-')

        # Calculate weekly total for player
        total = 0
        for s in scores:
            if isinstance(s, int):
                total += s
        scores.append(total)
        scoresheet.append(scores)
        
        
        
        
        # RP Scoring
    else:

        scores = [team[key][1]]

        for day in team[key][-1]:

            line = df.loc[df['Date'] == day]
            if not line.empty:
                line = line.iloc[0][:]

                #Calculate daily points
                if len(line.loc['Dec']) > 0:
                    if line.loc['Dec'][0] == "W":
                        dec = 5
                    elif line.loc['Dec'][0] == "S":
                        dec = 3
                    else:
                        dec = 0
                else:
                    dec = 0

                outs = 3 * int(line.loc['IP'][0]) + int(line.loc['IP'][2])
                scores.append(outs + int(line.loc['SO']) + dec)
            else:
                scores.append('-')

        # Calculate weekly total for player
        total = 0
        for s in scores:
            if isinstance(s, int):
                total += s
        scores.append(total)
        scoresheet.append(scores)

In [None]:
def totals(col):
    tot = 0
    for i in col:
        if isinstance(i, int):
            tot += i

    return tot

week = pd.DataFrame(scoresheet[:], columns = header)
week.loc['Total']= week.apply(totals, axis=0)
week

# Dashes represent days the player did not play

In [None]:
# Change to users path to download csv of scores 
# week.to_csv('C:/Users/', sep=',')