# Mapping PlayerIds from FanGraphs and StatCast
Using the data from [Chadwick Bureau](https://github.com/chadwickbureau), map the player IDs from FanGraphs to their IDs on StatCast (which uses MLBAM IDs)

In [1]:
import pandas as pd
import warnings

warnings.filterwarnings("ignore", category=pd.errors.DtypeWarning)

In [2]:
# Read data
people_0 = pd.read_csv("data/chadwick-bureau/people-0.csv")
people_1 = pd.read_csv("data/chadwick-bureau/people-1.csv")
people_2 = pd.read_csv("data/chadwick-bureau/people-2.csv")
people_3 = pd.read_csv("data/chadwick-bureau/people-3.csv")
people_4 = pd.read_csv("data/chadwick-bureau/people-4.csv")
people_5 = pd.read_csv("data/chadwick-bureau/people-5.csv")
people_6 = pd.read_csv("data/chadwick-bureau/people-6.csv")
people_7 = pd.read_csv("data/chadwick-bureau/people-7.csv")
people_8 = pd.read_csv("data/chadwick-bureau/people-8.csv")
people_9 = pd.read_csv("data/chadwick-bureau/people-9.csv")
people_a = pd.read_csv("data/chadwick-bureau/people-a.csv")
people_b = pd.read_csv("data/chadwick-bureau/people-b.csv")
people_c = pd.read_csv("data/chadwick-bureau/people-c.csv")
people_d = pd.read_csv("data/chadwick-bureau/people-d.csv")
people_e = pd.read_csv("data/chadwick-bureau/people-e.csv")
people_f = pd.read_csv("data/chadwick-bureau/people-f.csv")

# Concatenate data
id_data = pd.concat(
    [
        people_0,
        people_1,
        people_2,
        people_3,
        people_4,
        people_5,
        people_6,
        people_7,
        people_8,
        people_9,
        people_a,
        people_b,
        people_c,
        people_d,
        people_e,
        people_f,
    ],
    axis=0,
)

# Remove unnecessary columns
id_data = id_data[
    [
        "key_person",
        "key_uuid",
        "key_mlbam",
        "key_retro",
        "key_bbref",
        "key_bbref_minors",
        "key_fangraphs",
        "name_last",
        "name_first",
        "name_given",
        "pro_played_first",
        "pro_played_last",
    ]
]

# Remove any players without a FanGraphs/StatCast id
id_data = id_data[id_data["key_fangraphs"].notna()]
id_data = id_data[id_data["key_mlbam"].notna()]
id_data = id_data[id_data["pro_played_first"].notna()]
id_data = id_data[id_data["pro_played_last"].notna()]


# Change column type from float to int
id_data["key_mlbam"] = id_data["key_mlbam"].astype(int)
id_data["key_fangraphs"] = id_data["key_fangraphs"].astype(int)
id_data["pro_played_first"] = id_data["pro_played_first"].astype(int)
id_data["pro_played_last"] = id_data["pro_played_last"].astype(int)

id_data

Unnamed: 0,key_person,key_uuid,key_mlbam,key_retro,key_bbref,key_bbref_minors,key_fangraphs,name_last,name_first,name_given,pro_played_first,pro_played_last
49,000539fc,000539fc-40b1-4bc4-9764-2941d18f398c,605152,bradj002,bradlje01,bradle000jed,13166,Bradley,Jed,Jedidiah Custer,2011,2019
56,000638f5,000638f5-74e9-4e4f-99d2-47969ae2d7a8,110625,barrm002,barrima01,barrio001man,1000605,Barrios,Manuel,Manuel Antonio,1994,2003
59,0007057d,0007057d-9e3c-4db2-9ceb-989cd788605e,118336,martf102,martifr01,martin001fra,1008165,Martin,Frank,Frank Joseph,1897,1905
69,00087a47,00087a47-2151-4fcd-8952-98e915e88143,111603,browb101,brownby01,browne002byr,1001500,Browne,Byron,Byron Ellis,1963,1975
122,00109852,00109852-1ac0-4751-b0ed-11ab819d8ba3,113051,darir101,darinro01,daring001rol,1002986,Daringer,Rolla,Rolla Harrison,1910,1921
...,...,...,...,...,...,...,...,...,...,...,...,...
32189,ffef3420,ffef3420-0132-4e68-86c9-615fd859e9e8,120175,pagev101,pageva01,page--001van,1009957,Page,Vance,Vance Linwood,1926,1942
32235,fff5491d,fff5491d-9cb9-48af-a4b6-67796f7f3ceb,124274,wilkr101,wilkiro01,wilkin003roy,1013961,Wilkinson,Roy,Roy Hamilton,1913,1932
32261,fff74cf7,fff74cf7-a005-4a4a-b361-95b13908b65d,113250,demaa101,demaral01,demare001alb,1003187,Demaree,Al,Albert Wentworth,1908,1924
32290,fffb2763,fffb2763-f758-4d94-af65-ee60117fea4d,118767,mckef101,mckeefr01,mckee-001fra,1008583,McKee,Frank,Frank,1884,1884


## Create mapping

In [None]:
df_mlb_stats = pd.read_csv("data/full-mlb-stats.csv")
df_mlb_stats = df_mlb_stats[["Name", "StatCast_ID"]]

player_mapping = df_mlb_stats.merge(
    id_data, how="inner", left_on="StatCast_ID", right_on="key_mlbam"
)

player_mapping[["Name", "StatCast_ID", "key_fangraphs"]]

Unnamed: 0,Name,StatCast_ID,key_fangraphs
0,Carlos Ruiz,434563,2579
1,Avisaíl García,541645,5760
2,Allan Dykstra,488852,9113
3,Jimmy Nelson,519076,10547
4,Matt Garza,490063,3340
...,...,...,...
7125,Eric Haase,606992,14111
7126,Spencer Steer,668715,26323
7127,Yandy Díaz,650490,16578
7128,Jose Herrera,645444,17040


In [16]:
# Save data
player_mapping.to_csv("data/player_mapping.csv", index=False)