# Data Collection

## Import Libraries

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import time

## Import Data for Player Names and ID's

In [2]:
player_id = pd.read_csv('../SFBB Player ID Map - PLAYERIDMAP.csv')

In [3]:
player_id.head()

Unnamed: 0,IDPLAYER,PLAYERNAME,BIRTHDATE,FIRSTNAME,LASTNAME,TEAM,LG,POS,IDFANGRAPHS,FANGRAPHSNAME,...,DRAFTKINGSNAME,OTTONEUID,HQID,RAZZBALLNAME,FANTRAXID,FANTRAXNAME,ROTOWIRENAME,ALLPOS,NFBCLASTFIRST,ACTIVE
0,aardsda01,David Aardsma,12/27/1981,David,Aardsma,,,P,1902,David Aardsma,...,David Aardsma,4362.0,,David Aardsma,,,David Aardsma,P,"Aardsma, David",N
1,abadfe01,Fernando Abad,12/17/1985,Fernando,Abad,BAL,AL,P,4994,Fernando Abad,...,Fernando Abad,7372.0,3556.0,Fernando Abad,*01viz*,Fernando Abad,Fernando Abad,P,"Abad, Fernando",Y
2,abbotco01,Cory Abbott,9/20/1995,Cory,Abbott,CHC,NL,P,sa3005305,Cory Abbott,...,,,6286.0,Cory Abbott,*04ef6*,Cory Abbott,Cory Abbott,P,"Abbott, Cory",Y
3,abramcj01,CJ Abrams,10/3/2000,CJ,Abrams,SD,NL,SS,sa3010152,CJ Abrams,...,,,,CJ Abrams,*04qk8*,CJ Abrams,CJ Abrams,SS,"Abrams, CJ",Y
4,abreual01,Albert Abreu,9/26/1995,Albert,Abreu,NYY,AL,P,17485,Albert Abreu,...,,,5762.0,Albert Abreu,*03xy4*,Albert Abreu,Albert Abreu,P,"Abreu, Albert",Y


### Specify which Columns and Rows Needed

In [4]:
mlb_id = player_id[['MLBID', 'FIRSTNAME', 'LASTNAME','POS', 'ACTIVE']]

#### Only Want Active Players

In [5]:
active = mlb_id[mlb_id['ACTIVE'] == 'Y']

In [6]:
active = active[active['POS']!= 'P']

In [7]:
active.head()

Unnamed: 0,MLBID,FIRSTNAME,LASTNAME,POS,ACTIVE
3,682928.0,CJ,Abrams,SS,Y
7,547989.0,Jose,Abreu,1B,Y
12,554429.0,Dustin,Ackley,1B,Y
13,660670.0,Ronald,Acuna,OF,Y
14,542436.0,Cristhian,Adames,2B,Y


In [8]:
active.isnull().sum()

MLBID        1
FIRSTNAME    0
LASTNAME     0
POS          0
ACTIVE       0
dtype: int64

In [9]:
active = active.dropna()

#### Change ID to Integer

In [10]:
active['MLBID'] = active['MLBID'].astype(int)

#### Save Dataframe with Id and names

In [11]:
active.to_csv('../mlb_players.csv')

## Function for Collecting Stats from Baseballsavant.mlb.com

In [12]:
def get_stats(mlbid, first_name, last_name):
    """
    This function collects the gamelog stats of every 
    game for that season for the specified player from the arguments.
    
    Returns csv file named after the player of their stats to specified location.
    """
    
    #This is the url to the website
    base_url = 'https://baseballsavant.mlb.com/savant-player/'
    
    #This string will be used to specifiy the player
    player_name = first_name.lower() +'-'+last_name.lower()+str(mlbid)
    
    #Url for the page with the stats
    url = base_url + player_name + '?stats=gamelogs-r-hitting-mlb&season=2021'
    
    #Requests for the page
    res = requests.get(url)
    
    if res.status_code != 200:
        raise Exception('API response: {}'.format(res.status_code))   
        #Modified from https://pypi.org/project/ratelimit/
        
    else:
    
        soup = BeautifulSoup(res.content, 'lxml')

        player_stats = []

        try:

            #Find the table with desired stats
            table = soup.find('div', {'id':['gamelogs-mlb']})

            #Finds all the columns needed
            for row in table.find('tbody').find_all('tr'):

                td_tags = row.find_all('td')
                for index, td in enumerate(td_tags):
                    stats = {}
                    stats['date'] = td_tags[0].text.strip()
                    stats['PA'] = td_tags[3].text.strip()
                    stats['AB'] = td_tags[4].text.strip()
                    stats['R'] = td_tags[5].text.strip()
                    stats['H'] = td_tags[6].text.strip()
                    stats['2B'] = td_tags[7].text.strip()
                    stats['3B'] = td_tags[8].text.strip()
                    stats['HR'] = td_tags[9].text.strip()
                    stats['RBI'] = td_tags[10].text.strip()
                    stats['BB'] = td_tags[11].text.strip()
                    stats['SO'] = td_tags[12].text.strip()
                    stats['AVG'] = td_tags[16].text.strip()
                    stats['OBP'] = td_tags[17].text.strip()
                    stats['SLG'] = td_tags[18].text.strip()
                    stats['OPS'] = td_tags[19].text.strip()

                #Appends the row of stats to the list
                player_stats.append(stats)

            #Creates data frame of all stats
            df = pd.DataFrame(player_stats)

            #Saves Dataframe to a file with player name
            df.to_csv(f'./datasets/{first_name}-{last_name}-{mlbid}.csv')

            time.sleep(1) #suspends execution for 1 second to prevent too many requests
            #inspired from https://realpython.com/python-sleep/

        except (AttributeError, IndexError):
            # Minor league players have different table name. Shohei is a two way player.
            print(f'{first_name} {last_name}: Minor League Player or Shohei Ohtani!')


## Get Stats for All Active Non-Pitcher Players

In [13]:
%%time
for index, row in active.iterrows():
    
    mlbid = row['MLBID']
    first = row['FIRSTNAME']
    last = row['LASTNAME']
    pos = row['POS']

    get_stats(mlbid, first, last)
    
# Copied from https://stackoverflow.com/questions/16476924/how-to-iterate-over-rows-in-a-dataframe-in-pandas

CJ Abrams: Minor League Player or Shohei Ohtani!
J.J. Bleday: Minor League Player or Shohei Ohtani!
Zack Cox: Minor League Player or Shohei Ohtani!
Yusniel Diaz: Minor League Player or Shohei Ohtani!
Jeter Downs: Minor League Player or Shohei Ohtani!
Lucius Fox: Minor League Player or Shohei Ohtani!
Tyler Freeman: Minor League Player or Shohei Ohtani!
Anthony Garcia: Minor League Player or Shohei Ohtani!
Riley Greene: Minor League Player or Shohei Ohtani!
Reese Havens: Minor League Player or Shohei Ohtani!
Kyle Holder: Minor League Player or Shohei Ohtani!
Ryan Howard: Minor League Player or Shohei Ohtani!
James Jones: Minor League Player or Shohei Ohtani!
Nolan Jones: Minor League Player or Shohei Ohtani!
Josh Jung: Minor League Player or Shohei Ohtani!
Royce Lewis: Minor League Player or Shohei Ohtani!
Kevin Maitan: Minor League Player or Shohei Ohtani!
Austin Martin: Minor League Player or Shohei Ohtani!
Ernesto Mejia: Minor League Player or Shohei Ohtani!
Victor Victor Mesa: Minor 