In [263]:
#!/usr/bin/env python
import pdfplumber
import pandas as pd
import string
import re
import sys, os
import urllib

In [324]:
column_names = ["club",	"last_name", "first_name", "position", "base_salary", "guaranteed_compensation"]

positions = ["F", "M", "D", "F", "G"]

clubs = ['Atlanta United', 'Austin FC', 'Charlotte FC', 'Chicago Fire', 'Chivas USA',
         'Colorado Rapids', 'Columbus Crew', 'DC United', 'FC Cincinnati',
         'FC Dallas', 'Houston Dynamo', 'Inter Miami', 'LA Galaxy', 'Los Angeles FC',
         'Major League Soccer', 'Minnesota United', 'Montreal Impact',
         'Nashville SC', 'New England Revolution', 'New York City FC',
         'New York Red Bulls', 'Orlando City SC', 'Philadelphia Union',
         'Portland Timbers', 'Real Salt Lake', 'San Jose Earthquakes',
         'Seattle Sounders FC', 'Sporting Kansas City', 'St. Louis City SC',
         'Toronto FC', 'Vancouver Whitecaps', 'Pool']

club_abr = {
    'ATL': 'Atlanta United',
    'AUS': 'Austin FC',
    'CLT': 'Charlotte FC',
    'CHI': 'Chicago Fire',
    'CHV': 'Chivas USA',
    'COL': 'Colorado Rapids',
    'CLB': 'Columbus Crew',
    'DC': 'DC United',
    'CIN': 'FC Cincinnati',
    'DAL': 'FC Dallas',
    'HOU': 'Houston Dynamo',
    'MIA': 'Inter Miami',
    'LA': 'LA Galaxy',
    'LAFC': 'Los Angeles FC',
    'MLS': 'Major League Soccer',
    'MIN': 'Minnesota United',
    'MTL': 'Montreal Impact',
    'NSH': 'Nashville SC',
    'NE': 'New England Revolution',
    'NYCFC': 'New York City FC',
    'NYRB': 'New York Red Bulls',
    'ORL': 'Orlando City SC',
    'PHI': 'Philadelphia Union',
    'POR': 'Portland Timbers',
    'RSL': 'Real Salt Lake',
    'SJ': 'San Jose Earthquakes',
    'SEA': 'Seattle Sounders FC',
    'SKC': 'Sporting Kansas City',
    'STL': 'St. Louis City SC',
    'TOR': 'Toronto FC',
    'VAN': 'Vancouver Whitecaps',
    'Pool': 'Pool'
}

team_region_mapping = {
    'Atlanta United': 'Eastern',
    'Austin FC': 'Western',
    'Charlotte FC': 'Eastern',
    'Chicago Fire': 'Eastern',
    'Chivas USA': 'Western',
    'Colorado Rapids': 'Western',
    'Columbus Crew': 'Eastern',
    'DC United': 'Eastern',
    'FC Cincinnati': 'Eastern',
    'FC Dallas': 'Western',
    'Houston Dynamo': 'Western',
    'Inter Miami': 'Eastern',
    'LA Galaxy': 'Western',
    'Los Angeles FC': 'Western',
    'Major League Soccer': 'N/A',
    'Minnesota United': 'Western',
    'Montreal Impact': 'Eastern',
    'Nashville SC': 'Western',
    'New England Revolution': 'Eastern',
    'New York City FC': 'Eastern',
    'New York Red Bulls': 'Eastern',
    'Orlando City SC': 'Eastern',
    'Philadelphia Union': 'Eastern',
    'Portland Timbers': 'Western',
    'Real Salt Lake': 'Western',
    'San Jose Earthquakes': 'Western',
    'Seattle Sounders FC': 'Western',
    'Sporting Kansas City': 'Western',
    'St. Louis City SC': 'Western',
    'Toronto FC': 'Eastern',
    'Vancouver Whitecaps': 'Western',
    'Pool': 'N/A'
}

def combine_elements(source_list, index1, index2):
    out_str = source_list[index1] + source_list[index2]
    del source_list[index2]
    source_list[index1] = out_str
    return source_list
    

def get_best_match(entry, option_list=clubs):

    if entry is None:
        return None
    best_match = None
    match_distance = float('inf')

    for option in option_list:
        distance = calc_lev_distance(entry, option)
        

        if distance < match_distance:
            best_match = option
            match_distance = distance


    return best_match

def calc_lev_distance(string1, string2):
    try:
        m = len(string1)
        n = len(string2)
    except TypeError:
        return(string1)

    # Create a matrix of size (m+1)x(n+1) to store the distances
    dp = [[0] * (n + 1) for _ in range(m + 1)]

    # Initialize the first row and column of the matrix
    for i in range(m + 1):
        dp[i][0] = i
    for j in range(n + 1):
        dp[0][j] = j

    # Calculate the distances using dynamic programming
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            if string1[i - 1] == string2[j - 1]:
                dp[i][j] = dp[i - 1][j - 1]
            else:
                dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1])

    # Return the Levenshtein distance between the two strings
    return dp[m][n]

In [265]:
pdf = pdfplumber.open('data/mls_salaries/PDF/mls_salaries_2018.pdf')
pages = pdf.pages
mls_salaries_2018_df = pd.DataFrame(columns=column_names)
for page in pdf.pages:
        table = page.extract_table(table_settings={"vertical_strategy": "text", 
                                               "horizontal_strategy": "text", 
                                               "snap_tolerance": 9})
        del table[:4]
        for line in table:
            # combines spaces that have been incorrectly split using PDFPlumber
            line = combine_elements(line, 0, 1)
            line = combine_elements(line, 4, 5)
            line = combine_elements(line, 5, 6)
            if len(line) != 6:
                line = combine_elements(line, 0, 1)
            # combines last 3 spaces to then split with $
            line = combine_elements(line, 3, 4)
            line = combine_elements(line, 3, 4)
            # removes punctuations and spaces in the last 3 lines
            pattern = r"[^\w$]"
            line[-1] = cleaned_string = re.sub(pattern, '', line[-1])
            split_elements = line[-1].split('$')
            del line[-1]
            line.extend(split_elements)
            new_line_df = pd.DataFrame([line], columns=column_names)
            mls_salaries_2018_df = pd.concat([mls_salaries_2018_df, new_line_df], ignore_index=True)
pdf.close()
# mls_salaries_2018_df["position"] = mls_salaries_2018_df["position"].str.extract(r'([A-Z])')
mls_salaries_2018_df.position = mls_salaries_2018_df.position.apply(lambda x: get_best_match(x, positions))
mls_salaries_2018_df["base_salary"] = pd.to_numeric(mls_salaries_2018_df["base_salary"])
mls_salaries_2018_df["guaranteed_compensation"] = pd.to_numeric(mls_salaries_2018_df["guaranteed_compensation"])
mls_salaries_2018_df["base_salary"] = mls_salaries_2018_df["base_salary"] / 100
mls_salaries_2018_df["guaranteed_compensation"] = mls_salaries_2018_df["guaranteed_compensation"] / 100
mls_salaries_2018_df

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
0,New York Red Bulls,Abang,Anatole,F,68927.00,68927.00
1,New York City FC,Abdul-Salaam,Saad,D,106480.00,120230.00
2,Columbus Crew,Abu,Mohammed,M,175008.00,181258.00
3,Columbus Crew,Abubakar,Lalas,D,68250.04,75750.04
4,PhiladelphiaUnion,Accam,David,F,1250000.04,1250000.04
...,...,...,...,...,...,...
689,FC Dallas,Ziegler,Reto,D,750000.00,800000.00
690,LAFC,Zimmerman,Walker,D,235000.00,235000.00
691,Atlanta United,Zizzo,Sal,D,129999.96,129999.96
692,FC Dallas,Zobeck,Kyle,G,67500.00,67500.00


In [266]:
mls_salaries_2018_df.to_csv('data/mls_salaries/mls-salaries-2018.csv', index=False)

In [267]:
pdf = pdfplumber.open('data/mls_salaries/PDF/mls_salaries_2019.pdf')
pages = pdf.pages
mls_salaries_2019_df = pd.DataFrame(columns=column_names)
for i, page in enumerate(pdf.pages):
        table = page.extract_table(table_settings={"vertical_strategy": "text", 
                                               "horizontal_strategy": "text", 
                                               "snap_tolerance": 5})
        if i == 0:
            del table[:1]
        for line in table:
            # combines spaces that have been incorrectly split using PDFPlumber
            line = combine_elements(line, 0, 1)

            # combines last 3 spaces to then split with $
            line = combine_elements(line, 3, 4)
            line = combine_elements(line, 3, 4)
            # removes punctuations and spaces in the last 3 lines
            pattern = r"[^\w$]"
            line[-1] = cleaned_string = re.sub(pattern, '', line[-1])
            split_elements = line[-1].split('$')
            del line[-1]
            line.extend(split_elements)
            new_line_df = pd.DataFrame([line], columns=column_names)
            mls_salaries_2019_df = pd.concat([mls_salaries_2019_df, new_line_df], ignore_index=True)
pdf.close()
# mls_salaries_2019_df["position"] = mls_salaries_2019_df["position"].str.extract(r'([A-Z])')
mls_salaries_2019_df.position = mls_salaries_2019_df.position.apply(lambda x: get_best_match(x, positions))
mls_salaries_2019_df["base_salary"] = pd.to_numeric(mls_salaries_2019_df["base_salary"])
mls_salaries_2019_df["guaranteed_compensation"] = pd.to_numeric(mls_salaries_2019_df["guaranteed_compensation"])
mls_salaries_2019_df["base_salary"] = mls_salaries_2019_df["base_salary"] / 100
mls_salaries_2019_df["guaranteed_compensation"] = mls_salaries_2019_df["guaranteed_compensation"] / 100
mls_salaries_2019_df

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
0,PhiladelphiaUnion,Aaronson,Brenden,F,70000.08,98309.48
1,Seattle Sounders FC,Abdul-Salaam,Saad,D,70250.04,70250.04
2,Colorado Rapids,Abubakar,Lalas,D,135000.00,144937.50
3,Columbus Crew,Accam,David,F,1010004.00,1137920.00
4,Colorado Rapids,Acosta,Kellyn,M,549999.96,664999.96
...,...,...,...,...,...,...
709,FC Dallas,Ziegler,Reto,D,820000.08,870000.08
710,LAFC,Zimmerman,Walker,D,600000.00,600000.00
711,FC Dallas,Zobeck,Kyle,G,70875.00,70875.00
712,LA Galaxy,Zubak,Ethan,F,75000.00,78125.00


In [268]:
mls_salaries_2019_df.to_csv('data/mls_salaries/mls-salaries-2019.csv', index=False)

In [269]:
pdf = pdfplumber.open('data/mls_salaries/PDF/mls_salaries_2020.pdf')
pages = pdf.pages
mls_salaries_2020_df = pd.DataFrame(columns=column_names)
for i, page in enumerate(pdf.pages):
      table = page.extract_table(table_settings={"vertical_strategy": "text", 
                                               "horizontal_strategy": "text", 
                                               "snap_tolerance": 8})
        
      del table[:2]
      for line in table:
            # combines spaces that have been incorrectly split using PDFPlumber
            line = combine_elements(line, 2, 3)

            if len(line) == 7:
                  line = combine_elements(line, 2, 3)
            if len(line) == 5:
                  for pos in positions:
                        if line[2].endswith(pos):
                              line[2] = line[2][:-len(pos)]
                              line.insert(3, pos)
                              break

            # places player name in correct position
            player_name = line[:2]
            del line[:2]
            line.insert(1, player_name[0])
            line.insert(2, player_name[1])

            # combines last 3 spaces to then split with $
            if len(line) != 6:
                   line.insert(3, 'G')
            line = combine_elements(line, 3, 4)
            line = combine_elements(line, 3, 4)
            # # removes punctuations and spaces in the last 3 lines
            pattern = r"[^\w$]"
            line[-1] = cleaned_string = re.sub(pattern, '', line[-1])
            split_elements = line[-1].split('$')
            del line[-1]
            line.extend(split_elements)
            new_line_df = pd.DataFrame([line], columns=column_names)
            mls_salaries_2020_df = pd.concat([mls_salaries_2020_df, new_line_df], ignore_index=True)
            
pdf.close()
mls_salaries_2020_df.position = mls_salaries_2020_df.position.apply(lambda x: get_best_match(x, positions))
mls_salaries_2020_df["position"] = mls_salaries_2020_df["position"].str.extract(r'([A-Z])')
mls_salaries_2020_df

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
0,PhiladelphiaUnion,Aaronson,Brenden,M,85000,103309
1,FC Cincinnati,Abdul-Salaam,Saad,D,81375,90042
2,DC United,Abu,Mohammed,M,81375,81375
3,Colorado Rapids,Abubakar,Lalas,D,145000,154937
4,Nashville SC,Accam,David,F,875000,1002916
...,...,...,...,...,...,...
773,New York RedBulls,,Kaku,M,850000,850000
774,Orlando City SC,,Robinho,M,198000,205500
775,Philadelphia Union,,Ilsinho,M,425000,476250
776,San Jose Earthquakes,,Judson,M,320000,325000


In [270]:
mls_salaries_2020_df.to_csv('data/mls_salaries/mls-salaries-2020.csv', index=False)

In [271]:
pdf = pdfplumber.open('data/mls_salaries/PDF/mls_salaries_2021.pdf')
pages = pdf.pages
mls_salaries_2021_df = pd.DataFrame(columns=column_names)
for i, page in enumerate(pdf.pages):
        table = page.extract_table(table_settings={"vertical_strategy": "lines", 
                                               "horizontal_strategy": "lines", 
                                               "snap_tolerance": 8})
        if i == 0:
            del table[:2]
        for line in table:

            new_line_df = pd.DataFrame([line], columns=column_names)
            mls_salaries_2021_df = pd.concat([mls_salaries_2021_df, new_line_df], ignore_index=True)
            
pdf.close()
# mls_salaries_2021_df["position"] = mls_salaries_2021_df["position"].str.extract(r'([A-Z])')
mls_salaries_2021_df.position = mls_salaries_2021_df.position.apply(lambda x: get_best_match(x, positions))
mls_salaries_2021_df["base_salary"] = mls_salaries_2021_df["base_salary"].str.replace('$', '',  regex=False).str.replace(',', '',  regex=False).astype(float)
mls_salaries_2021_df["guaranteed_compensation"] = mls_salaries_2021_df["guaranteed_compensation"].str.replace('$', '',  regex=False).str.replace(',', '',  regex=False).astype(float)
mls_salaries_2021_df

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
0,Atlanta United,Adams,"Mohammed ""Mo""",M,121000.0,141000.0
1,Atlanta United,Ambrose,Mikey,D,81375.0,81375.0
2,Atlanta United,Araujo,Luiz,F,3600000.0,3941667.0
3,Atlanta United,Barco,Ezequiel,M,1875000.0,2358333.0
4,Atlanta United,Bauer,Josh,D,63547.0,63547.0
...,...,...,...,...,...,...
866,Vancouver Whitecaps,Sousa e Silva,Caio Alexandre,M,500000.0,539583.0
867,Vancouver Whitecaps,Teibert,Russell,M,350000.0,387500.0
868,Vancouver Whitecaps,Veselinovic,Ranko,D,360000.0,445500.0
869,Vancouver Whitecaps,Vite,Pedro,M,556000.0,609997.0


In [272]:
mls_salaries_2021_df.to_csv('data/mls_salaries/mls-salaries-2021.csv', index=False)

In [273]:
pdf = pdfplumber.open('data/mls_salaries/PDF/mls_salaries_2022.pdf')
pages = pdf.pages
mls_salaries_2022_df = pd.DataFrame(columns=column_names)
for i, page in enumerate(pdf.pages):
        table = page.extract_table(table_settings={"vertical_strategy": "lines", 
                                               "horizontal_strategy": "lines", 
                                               "snap_tolerance": 9})
        if i == 0:
            del table[:1]
        for line in table:

            new_line_df = pd.DataFrame([line], columns=column_names)
            mls_salaries_2022_df = pd.concat([mls_salaries_2022_df, new_line_df], ignore_index=True)
            
pdf.close()
# mls_salaries_2022_df["position"] = mls_salaries_2022_df["position"].str.extract(r'([A-Z])')
mls_salaries_2022_df.position = mls_salaries_2022_df.position.apply(lambda x: get_best_match(x, positions))
mls_salaries_2022_df["last_name"] = mls_salaries_2022_df["last_name"].str.replace(r'^[a-z]+', '', regex=True)
mls_salaries_2022_df["base_salary"] = mls_salaries_2022_df["base_salary"].str.replace('$', '',  regex=False).str.replace(',', '',  regex=False).astype(float)
mls_salaries_2022_df["guaranteed_compensation"] = mls_salaries_2022_df["guaranteed_compensation"].str.replace('$', '',  regex=False).str.replace(',', '',  regex=False).astype(float)
mls_salaries_2022_df

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
0,Atlanta United,Almada,Thiago,F,1650000.0,2332000.0
1,Atlanta United,Alonso,Osvaldo,M,84000.0,84000.0
2,Atlanta United,Ambrose,Mikey,D,85444.0,85444.0
3,Atlanta United,Araujo,Luiz,F,3600000.0,4480333.0
4,Atlanta United,Barco,Ezequiel,M,2200000.0,2200000.0
...,...,...,...,...,...,...
889,Vancouver Whit,Scoaupssa e Silva,Caio Alexand,M,500000.0,562500.0
890,Vancouver Whit,Tceaibpesrt,Russell,M,400000.0,449375.0
891,Vancouver Whit,Vceaspeslinovic,Ranko,D,405000.0,490500.0
892,Vancouver Whit,Vcitaeps,Pedro,M,556000.0,609997.0


In [274]:
mls_salaries_2022_df.to_csv('data/mls_salaries/mls-salaries-2022.csv', index=False)

In [275]:
pdf = pdfplumber.open('data/mls_salaries/PDF/mls_salaries_2023.pdf')
pages = pdf.pages
mls_salaries_2023_df = pd.DataFrame(columns=column_names)
for i, page in enumerate(pdf.pages):
        table = page.extract_table(table_settings={"vertical_strategy": "lines", 
                                               "horizontal_strategy": "lines", 
                                               "snap_tolerance": 8})
        
        del table[:1]
        for line in table:

            new_line_df = pd.DataFrame([line], columns=column_names)
            mls_salaries_2023_df = pd.concat([mls_salaries_2023_df, new_line_df], ignore_index=True)
            
pdf.close()
mls_salaries_2023_df.position = mls_salaries_2023_df.position.apply(lambda x: get_best_match(x, positions))
mls_salaries_2023_df["position"] = mls_salaries_2023_df["position"].str.extract(r'([A-Z])')
mls_salaries_2023_df["base_salary"] = mls_salaries_2023_df["base_salary"].str.replace('$', '',  regex=False).str.replace(',', '',  regex=False).astype(float)
mls_salaries_2023_df["guaranteed_compensation"] = mls_salaries_2023_df["guaranteed_compensation"].str.replace('$', '',  regex=False).str.replace(',', '',  regex=False).astype(float)
mls_salaries_2023_df

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation
0,Atlanta United,McFadden,Aiden,D,85444.0,85444.0
1,Atlanta United,Fortune,Ajani,M,67360.0,67360.0
2,Atlanta United,Sejdic,Amar,D,85444.0,92111.0
3,Atlanta United,Gutman,Andrew,D,350000.0,381250.0
4,Atlanta United,Guzan,Brad,G,600000.0,612500.0
...,...,...,...,...,...,...
864,Vancouver Whitecaps,Córdova,Sergio,F,981818.0,1050568.0
865,Vancouver Whitecaps,Becher,Simon,F,67360.0,70485.0
866,Vancouver Whitecaps,Hasal,Thomas,G,203000.0,222412.0
867,Vancouver Whitecaps,Blackmon,Tristan,D,400000.0,451208.0


In [276]:
mls_salaries_2023_df.to_csv('data/mls_salaries/mls-salaries-2023.csv', index=False)

In [327]:
directory = "data/mls_salaries/"
mls_salaries_df = pd.DataFrame()
for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        filepath = os.path.join(directory, filename)
        year = filename[-8:-4]  # Extract the year from the filename
        
        # Read the CSV file into a temporary DataFrame
        temp_df = pd.read_csv(filepath)
        
        # Add a 'Year' column with the extracted year value
        temp_df['year'] = year
        
        # Append the temporary DataFrame to the combined DataFrame
        mls_salaries_df = pd.concat([mls_salaries_df, temp_df], ignore_index=True)
mls_salaries_df

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation,year
0,CHI,Armas,Chris,M,225000.0,225000.0,2007
1,CHI,Banner,Michael,M,12900.0,12900.0,2007
2,CHI,Barrett,Chad,F,41212.5,48712.5,2007
3,CHI,Blanco,Cuauhtemoc,F,2492316.0,2666778.0,2007
4,CHI,Brown,C.J.,D,106391.0,106391.0,2007
...,...,...,...,...,...,...,...
10368,Vancouver Whitecaps,Córdova,Sergio,F,981818.0,1050568.0,2023
10369,Vancouver Whitecaps,Becher,Simon,F,67360.0,70485.0,2023
10370,Vancouver Whitecaps,Hasal,Thomas,G,203000.0,222412.0,2023
10371,Vancouver Whitecaps,Blackmon,Tristan,D,400000.0,451208.0,2023


In [328]:
mls_salaries_df['club'] = mls_salaries_df['club'].replace(club_abr)
mls_salaries_df.position = mls_salaries_df.position.apply(lambda x: get_best_match(x, positions))
mls_salaries_df.club = mls_salaries_df.club.apply(lambda x: get_best_match(x, clubs))
mls_salaries_df['conference'] = mls_salaries_df['club'].map(team_region_mapping)

In [336]:
mls_salaries_df.to_csv('data/mls_salaries/mls_salaries_masterlist.csv', index=False)



In [337]:
mls_salaries_df = pd.read_csv("data/mls_salaries/mls_salaries_masterlist.csv")
mls_salaries_df.sample(20)

Unnamed: 0,club,last_name,first_name,position,base_salary,guaranteed_compensation,year,conference
3001,Chivas USA,Mejia,Edgar,M,140000.0,140000.0,2013,Western
1856,FC Dallas,Lambo,Josh,G,100000.0,133000.0,2011,Western
310,Pool,Wolyniec,John,F,65625.0,65625.0,2007,
749,Chivas USA,Vaughn,Lawson,D,33000.0,33000.0,2008,Western
5889,Orlando City SC,Kljestan,Sacha,M,1025000.04,1100000.04,2018,Eastern
8248,New England Revolution,nKizza,Edward,F,63547.0,63547.0,2021,Eastern
5764,New England Revolution,Fagundez,Diego,M,170000.0,190000.0,2018,Eastern
2342,Portland Timbers,Jewsbury,Jack,D,180000.0,189750.0,2012,Western
4525,Houston Dynamo,Deric,Tyler,G,170000.0,170000.0,2016,Western
8838,DC United,Berry,Miguel,F,120000.0,120000.0,2022,Eastern


In [338]:
mlb_salaries_df = pd.read_csv("data/mlb_salaries.csv")
mlb_salaries_df.sample(20)

Unnamed: 0,playerid,player_name,weight,height,bats,throws,season,league,teamid,team,franchise,salary
4457,cedenro02,Ronny Cedeno,195,72,R,R,2009,AL,SEA,Seattle Mariners,Seattle Mariners,822500
12144,incavpe01,Pete Incaviglia,225,73,R,R,1988,AL,TEX,Texas Rangers,Texas Rangers,275000
3210,bruceja01,Jay Bruce,225,75,L,L,2012,NL,CIN,Cincinnati Reds,Cincinnati Reds,5041666
19373,parrist01,Steve Parris,190,72,R,R,2003,AL,TBA,Tampa Bay Devil Rays,Tampa Bay Rays,400000
27958,wynnema01,Marvell Wynne,175,71,L,L,1988,NL,SDN,San Diego Padres,San Diego Padres,255000
13967,lambmi01,Mike Lamb,205,73,L,R,2004,NL,HOU,Houston Astros,Houston Astros,352000
1400,batismi01,Miguel Batista,210,73,R,R,2001,NL,ARI,Arizona Diamondbacks,Arizona Diamondbacks,400000
6381,diazca01,Carlos Diaz,161,72,R,L,1985,NL,LAN,Los Angeles Dodgers,Los Angeles Dodgers,120000
6647,drabedo01,Doug Drabek,185,73,R,R,1991,NL,PIT,Pittsburgh Pirates,Pittsburgh Pirates,3350000
11189,hernaor01,Orlando Hernandez,210,74,R,R,2001,AL,NYA,New York Yankees,New York Yankees,2050000
