In [2]:
import pandas as pd
import requests
import re
from bs4 import BeautifulSoup

In [3]:
url = "http://www.espn.com/mlb/freeagents/_/year/2017/type/dollars"
response = requests.get(url)
page = response.text

In [4]:
soup = BeautifulSoup(page,"html5lib")

In [5]:
#Players are both in class oddrow and even row
#Using regex to grab both at once
rowplayer_regex = re.compile('.*row\splayer.*')

In [6]:
col_name_all = soup.find_all(class_="colhead")[1]
col_names_list=[]
for name in col_name_all.find_all("td"):
    col_names_list.append(name.text)

## Now Lets Build a List of 2017 MLB Free Agents

In [7]:
free_agents_list = soup.find_all(class_=rowplayer_regex)

In [8]:
free_agents_17 = {} #empty dict to add player and his data to
for row in free_agents_list:
    items = row.find_all('td')
    player = items[0].find('a').text
    free_agents_17[player] = [i.text for i in items[1:]]

In [9]:
free_agents_17df = pd.DataFrame.from_dict\
(data=free_agents_17,orient='index')
free_agents_17df.reset_index(inplace=True)
#reset index because player name shouldn't be index
#We'll need it to join on

In [10]:
free_agents_17df.head()

Unnamed: 0,index,0,1,2,3,4,5,6,7
0,Eric Hosmer,1B,28,Signed,Royals,Padres,8,7,"$144,000,000"
1,Yu Darvish,SP,31,Signed,Dodgers,Cubs,6,1,"$126,000,000"
2,J.D. Martinez,RF,30,Signed,Diamondbacks,Red Sox,5,6,"$110,000,000"
3,Lorenzo Cain,CF,32,Signed,Royals,Brewers,5,2,"$80,000,000"
4,Jake Arrieta,SP,32,Signed,Cubs,Phillies,3,4,"$75,000,000"


In [11]:
#Rename Columns to match ESPN's
col_names_dict = {"index":"PLAYER",0:"POS",1:"AGE",2:"STATUS",\
                 3:"OLD_TEAM",4:"NEW_TEAM",5:"YRS",6:"RK"\
                  ,7:"DOLLARS"}

In [12]:
free_agents_17df.rename(columns=col_names_dict,inplace=True)

In [13]:
#Last Step - Add Free Agent Year as Column
free_agents_17df["PLAYER_YEAR"]=free_agents_17df["PLAYER"] + " 2017"

In [14]:
free_agents_17df.head()

Unnamed: 0,PLAYER,POS,AGE,STATUS,OLD_TEAM,NEW_TEAM,YRS,RK,DOLLARS,PLAYER_YEAR
0,Eric Hosmer,1B,28,Signed,Royals,Padres,8,7,"$144,000,000",Eric Hosmer 2017
1,Yu Darvish,SP,31,Signed,Dodgers,Cubs,6,1,"$126,000,000",Yu Darvish 2017
2,J.D. Martinez,RF,30,Signed,Diamondbacks,Red Sox,5,6,"$110,000,000",J.D. Martinez 2017
3,Lorenzo Cain,CF,32,Signed,Royals,Brewers,5,2,"$80,000,000",Lorenzo Cain 2017
4,Jake Arrieta,SP,32,Signed,Cubs,Phillies,3,4,"$75,000,000",Jake Arrieta 2017


In [52]:
#Clean Up DOLLARS column from dashes, commas and $ signs

free_agents_17df["DOLLARS"] = free_agents_17df["DOLLARS"].str.replace(r'\D','0')

free_agents_17df.head()

Unnamed: 0,PLAYER,POS,AGE,STATUS,OLD_TEAM,NEW_TEAM,YRS,RK,DOLLARS,FA_YEAR
0,Eric Hosmer,1B,28,Signed,Royals,Padres,8,7,14400000000,2017
1,Yu Darvish,SP,31,Signed,Dodgers,Cubs,6,1,12600000000,2017
2,J.D. Martinez,RF,30,Signed,Diamondbacks,Red Sox,5,6,11000000000,2017
3,Lorenzo Cain,CF,32,Signed,Royals,Brewers,5,2,8000000000,2017
4,Jake Arrieta,SP,32,Signed,Cubs,Phillies,3,4,7500000000,2017


In [54]:
#Convert Numeric columns to numbers

num_cols = ["DOLLARS","AGE","YRS","FA_YEAR"]
for col in num_cols:
    free_agents_17df[col]=\
    pd.to_numeric(free_agents_17df[col])

In [55]:
free_agents_17df.dtypes

PLAYER       object
POS          object
AGE           int64
STATUS       object
OLD_TEAM     object
NEW_TEAM     object
YRS         float64
RK           object
DOLLARS       int64
FA_YEAR       int64
dtype: object

## ESPN FREE AGENT DATA HAS BEEN GRABBED + CLEANED
next step with this is to build script to grab multiple years of this data


In [43]:
def espn_MLB_free_agents(year):
    #URL
    url = (f"http://www.espn.com/mlb/freeagents/_/year/{year}/type/dollars")
    print(f"Checking url: {url}")
    response = requests.get(url)
    page = response.text
    
    #BS for web scraping - table is pure HTML
    soup = BeautifulSoup(page,"html5lib")
    
    #Col Names - just to make sure all tables are the same for safety
    col_name_all = soup.find_all(class_="colhead")[1]
    col_names_list=[]
    for name in col_name_all.find_all("td"):
        col_names_list.append(name.text)
        
    #Players are both in class oddrow and even row
    #Using regex to grab both at once
    rowplayer_regex = re.compile('.*row\splayer.*')
        
    #grab players, using regex
    free_agents_list = soup.find_all(class_=rowplayer_regex)
    
    free_agents_one_year = {} #empty dict to add player and his data to
    for row in free_agents_list: #go through all players on site
        items = row.find_all('td') #find table data elements
        player = items[0].find('a').text #name will be first column, take text from a tag
        free_agents_one_year[player] = [i.text for i in items[1:]] #rest of data for table
    
    #placeholder dataframe, we will add to master after cleansing
    df = pd.DataFrame.from_dict(data=free_agents_one_year,orient='index')
    df.reset_index(inplace=True)
    #reset index because player name shouldn't be index
    #We'll need it to join on our MLB hitting data
    
    #Data Cleaning Time
    #Rename Columns to match ESPN's
    col_names_dict = {"index":"PLAYER",0:"POS",1:"AGE",2:"STATUS",\
                     3:"OLD_TEAM",4:"NEW_TEAM",5:"YRS",6:"RK"\
                      ,7:"DOLLARS"}
    df.rename(columns=col_names_dict,inplace=True)
    #Next Step - Add Free Agent Year as Column
    df["PLAYER_YEAR"]=df["PLAYER"] + f" {year}"
    print(df.head())
    return df #return df, we will append outside function

    

In [45]:
import copy
free_agents_backup = copy.copy(free_agents_17df)
free_agents_master = free_agents_backup

In [46]:
for inp_year in range(2010,2017,1):
    df = pd.DataFrame()
    df = espn_MLB_free_agents(inp_year)
    free_agents_master.append(df)
    print(f"{inp_year} has been added to master")
    print(free_agents_master.head())
    print("**********************************")
    

Checking url: http://www.espn.com/mlb/freeagents/_/year/2010/type/dollars
          PLAYER POS AGE      STATUS   OLD_TEAM   NEW_TEAM YRS RK  \
0  Carl Crawford  LF  36  Signed (A)       Rays    Red Sox   7  2   
1   Jayson Werth  LF  38  Signed (A)   Phillies  Nationals   7  3   
2      Cliff Lee  SP  39  Signed (A)    Rangers   Phillies   5  1   
3  Adrian Beltre  3B  39  Signed (A)    Red Sox    Rangers   6  4   
4      Adam Dunn  DH  38  Signed (A)  Nationals  White Sox   4  6   

        DOLLARS         PLAYER_YEAR  
0  $142,000,000  Carl Crawford 2010  
1  $126,000,000   Jayson Werth 2010  
2  $120,000,000      Cliff Lee 2010  
3   $96,000,000  Adrian Beltre 2010  
4   $56,000,000      Adam Dunn 2010  
2010 has been added to master
          PLAYER POS AGE  STATUS      OLD_TEAM  NEW_TEAM YRS RK       DOLLARS  \
0    Eric Hosmer  1B  28  Signed        Royals    Padres   8  7  $144,000,000   
1     Yu Darvish  SP  31  Signed       Dodgers      Cubs   6  1  $126,000,000   
2  J.D. Ma

            PLAYER POS AGE  STATUS OLD_TEAM   NEW_TEAM YRS  RK       DOLLARS  \
0  Yoenis Cespedes  LF  32  Signed     Mets       Mets   4   1  $110,000,000   
1  Aroldis Chapman  RP  30  Signed     Cubs    Yankees   5  11   $86,000,000   
2    Dexter Fowler  RF  32  Signed     Cubs  Cardinals   5   2   $82,500,000   
3    Kenley Jansen  RP  30  Signed  Dodgers    Dodgers   5   8   $80,000,000   
4      Ian Desmond  LF  32  Signed  Rangers    Rockies   5  10   $70,000,000   

            PLAYER_YEAR  
0  Yoenis Cespedes 2016  
1  Aroldis Chapman 2016  
2    Dexter Fowler 2016  
3    Kenley Jansen 2016  
4      Ian Desmond 2016  
2016 has been added to master
          PLAYER POS AGE  STATUS      OLD_TEAM  NEW_TEAM YRS RK       DOLLARS  \
0    Eric Hosmer  1B  28  Signed        Royals    Padres   8  7  $144,000,000   
1     Yu Darvish  SP  31  Signed       Dodgers      Cubs   6  1  $126,000,000   
2  J.D. Martinez  RF  30  Signed  Diamondbacks   Red Sox   5  6  $110,000,000   
3   Loren

In [49]:
free_agents_master.sample(15)

Unnamed: 0,PLAYER,POS,AGE,STATUS,OLD_TEAM,NEW_TEAM,YRS,RK,DOLLARS,PLAYER_YEAR
30,Luke Gregerson,RP,33,Signed,Astros,Cardinals,2.0,NR,"$11,000,000",Luke Gregerson 2017
163,Fernando Salas,RP,32,Signed,Angels,Diamondbacks,,NR,Minor Lg,Fernando Salas 2017
27,Steve Cishek,RP,31,Signed,Rays,Cubs,2.0,NR,"$13,000,000",Steve Cishek 2017
110,Chris Carter,1B,31,Signed,Athletics,Angels,,NR,Minor Lg,Chris Carter 2017
22,Jason Vargas,RP,35,Signed,Royals,Mets,2.0,35,"$16,000,000",Jason Vargas 2017
114,Kyle Crockett,RP,26,Signed,Reds,Reds,,NR,Minor Lg,Kyle Crockett 2017
157,Colby Rasmus,LF,31,Signed,Rays,Orioles,,NR,Minor Lg,Colby Rasmus 2017
107,Blaine Boyer,RP,36,Signed,Red Sox,Royals,,NR,Minor Lg,Blaine Boyer 2017
8,Jay Bruce,RF,31,Signed,Indians,Mets,3.0,12,"$39,000,000",Jay Bruce 2017
53,Miguel Gonzalez,SP,33,Signed,Rangers,White Sox,1.0,NR,"$4,750,000",Miguel Gonzalez 2017
