# Prediction

In [2]:
# Libraries
import pandas as pd
pd.set_option('display.max_columns', None)
import os

In [3]:
data = pd.read_csv("../preproc_data/final_full_dataset.csv", index_col=0, parse_dates=["at_bat_end_time"])
data = data.sort_values(by="at_bat_end_time", ascending=False)
data["hitter_ab_count"] = data.groupby('hitter_id')['hitter_id'].transform('count')
data["pitcher_ab_count"] = data.groupby('pitcher_id')['pitcher_id'].transform('count')
data.head(2)

Unnamed: 0,hitter_id,hitter_hand,pitcher_id,pitcher_hand,at_bat_end_time,hitter_position,hitter_previous_stats_szn,rolling_1ab,rolling_3ab,rolling_10ab,hitter_previous_stats_szn_slug,rolling_1ab_slug,rolling_3ab_slug,rolling_10ab_slug,pitcher_previous_stats_szn,rolling_1pitch,rolling_3pitch,rolling_10pitch,pitcher_previous_stats_szn_bases,rolling_1pitch_bases,rolling_3pitch_bases,rolling_10pitch_bases,handed_matchup,match_up_ab_count_delta,hitter_strikes_eff,hitter_balls_eff,pitcher_strikes_spread,pitcher_balls_spread,hitter_success_speed,pitcher_speed,hitter_fast_eff,hitter_offspeed_eff,pitcher_fast_spread,pitcher_offspeed_spread,y_target,hitter_ab_count,pitcher_ab_count
142306,a522f82a-e920-4ece-8e29-3f16630bf635,L,22dabc06-611e-4d36-81af-7153e7ba86e6,R,2023-08-23 04:31:54+00:00,OF,0.370518,0.0,0.333333,0.1,0.438247,0.0,0.333333,0.1,0.281553,0.0,0.666667,0.4,0.407767,0.0,0.666667,0.4,1,-45,0.292553,0.603175,0.665049,0.334951,89.776344,88.861463,0.368421,0.375,0.533981,0.466019,0,247,201
142305,a8efa694-2d64-4ad3-bf39-7d4f9006be7f,R,22dabc06-611e-4d36-81af-7153e7ba86e6,R,2023-08-23 04:30:56+00:00,C,0.355769,0.0,0.333333,0.2,0.548077,0.0,1.333333,0.5,0.282927,1.0,1.0,0.4,0.409756,1.0,1.0,0.4,0,-3,0.35461,0.358209,0.663415,0.336585,89.832432,88.841176,0.387097,0.309524,0.531707,0.468293,0,205,201


In [4]:
players = pd.read_csv("../raw_data/players.csv")
players = players[~players.id.duplicated(keep="first")]

## Pitcher data set

In [5]:
pitchers = players[players.position == "P"][["id", "first_name", "last_name", "team_nickname", "primary_position"]]
pitchers.head(2)

Unnamed: 0,id,first_name,last_name,team_nickname,primary_position
14,71791d18-9c59-4e2f-863d-007d3cdd7efd,Luis,Castillo,Mariners,SP
15,d3a08e67-05e9-4f54-b7b1-807c8e9beaf2,Logan,Gilbert,Mariners,SP


In [6]:
pitchers = pitchers.merge(data.drop_duplicates(subset='pitcher_id', keep='first')
               [["pitcher_id", "pitcher_hand", "pitcher_previous_stats_szn", "rolling_1pitch",
                 "rolling_3pitch", "rolling_10pitch", "pitcher_previous_stats_szn_bases",
                 "rolling_1pitch_bases", "rolling_3pitch_bases", "rolling_10pitch_bases",
                 "pitcher_strikes_spread", "pitcher_balls_spread", "pitcher_speed",
                 "pitcher_fast_spread", "pitcher_offspeed_spread", "pitcher_ab_count"]],
               how="left", left_on="id", right_on="pitcher_id")
pitchers.head(2)

Unnamed: 0,id,first_name,last_name,team_nickname,primary_position,pitcher_id,pitcher_hand,pitcher_previous_stats_szn,rolling_1pitch,rolling_3pitch,rolling_10pitch,pitcher_previous_stats_szn_bases,rolling_1pitch_bases,rolling_3pitch_bases,rolling_10pitch_bases,pitcher_strikes_spread,pitcher_balls_spread,pitcher_speed,pitcher_fast_spread,pitcher_offspeed_spread,pitcher_ab_count
0,71791d18-9c59-4e2f-863d-007d3cdd7efd,Luis,Castillo,Mariners,SP,71791d18-9c59-4e2f-863d-007d3cdd7efd,R,0.264241,1.0,0.666667,0.3,0.428797,1.0,0.666667,0.4,0.661392,0.338608,92.928051,0.582278,0.417722,611.0
1,d3a08e67-05e9-4f54-b7b1-807c8e9beaf2,Logan,Gilbert,Mariners,SP,d3a08e67-05e9-4f54-b7b1-807c8e9beaf2,R,0.270134,0.0,0.333333,0.4,0.424497,0.0,0.666667,0.6,0.694631,0.305369,90.218487,0.692953,0.307047,590.0


In [7]:
# Save pitcher data set
pitchers = pitchers.dropna().drop(columns="pitcher_id")
pitchers.dropna().to_csv("../mlb/interface/data/pitchers.csv")

## Hitter data set

In [8]:
hitters = players[players.position != "P"][["id", "first_name", "last_name", "team_nickname", "primary_position"]]
hitters.head(2)

Unnamed: 0,id,first_name,last_name,team_nickname,primary_position
0,5a5c2129-3f87-43ee-8f6b-a2b6ca91bab4,Matthew,Marlowe,Mariners,LF
1,c59cd03a-ead5-41d4-9b4a-d26f0dd04ea6,Dominic,Canzone,Mariners,LF


In [9]:
hitters = hitters.merge(data.drop_duplicates(subset='hitter_id', keep='first')
                        [["hitter_id", "hitter_hand", "hitter_position", "hitter_previous_stats_szn",
                          "rolling_1ab", "rolling_3ab", "rolling_10ab", "hitter_previous_stats_szn_slug",
                          "rolling_1ab_slug", "rolling_3ab_slug", "rolling_10ab_slug",
                          "hitter_strikes_eff", "hitter_balls_eff", "hitter_success_speed", "hitter_fast_eff",
                          "hitter_offspeed_eff", "hitter_ab_count"]],
                        how="left", left_on="id", right_on="hitter_id")
hitters.head(2)

Unnamed: 0,id,first_name,last_name,team_nickname,primary_position,hitter_id,hitter_hand,hitter_position,hitter_previous_stats_szn,rolling_1ab,rolling_3ab,rolling_10ab,hitter_previous_stats_szn_slug,rolling_1ab_slug,rolling_3ab_slug,rolling_10ab_slug,hitter_strikes_eff,hitter_balls_eff,hitter_success_speed,hitter_fast_eff,hitter_offspeed_eff,hitter_ab_count
0,5a5c2129-3f87-43ee-8f6b-a2b6ca91bab4,Matthew,Marlowe,Mariners,LF,5a5c2129-3f87-43ee-8f6b-a2b6ca91bab4,L,OF,0.348837,0.0,0.0,0.2,0.5,0.0,0.0,0.2,0.309091,0.419355,90.43,0.375,0.315789,84.0
1,c59cd03a-ead5-41d4-9b4a-d26f0dd04ea6,Dominic,Canzone,Mariners,LF,c59cd03a-ead5-41d4-9b4a-d26f0dd04ea6,L,OF,0.302083,0.0,0.333333,0.3,0.458333,0.0,0.333333,0.6,0.287879,0.333333,91.672414,0.395833,0.208333,94.0


In [10]:
# Save hitter data set
hitters = hitters.dropna().drop(columns="hitter_id")
hitters.dropna().to_csv("../mlb/interface/data/hitters.csv")

## Build X_new

In [11]:
X_new = pd.concat([hitters[hitters.id == "5a5c2129-3f87-43ee-8f6b-a2b6ca91bab4"].reset_index(),
           pitchers[pitchers.id == "d3a08e67-05e9-4f54-b7b1-807c8e9beaf2"].reset_index()], axis=1)
X_new

Unnamed: 0,index,id,first_name,last_name,team_nickname,primary_position,hitter_hand,hitter_position,hitter_previous_stats_szn,rolling_1ab,rolling_3ab,rolling_10ab,hitter_previous_stats_szn_slug,rolling_1ab_slug,rolling_3ab_slug,rolling_10ab_slug,hitter_strikes_eff,hitter_balls_eff,hitter_success_speed,hitter_fast_eff,hitter_offspeed_eff,hitter_ab_count,index.1,id.1,first_name.1,last_name.1,team_nickname.1,primary_position.1,pitcher_hand,pitcher_previous_stats_szn,rolling_1pitch,rolling_3pitch,rolling_10pitch,pitcher_previous_stats_szn_bases,rolling_1pitch_bases,rolling_3pitch_bases,rolling_10pitch_bases,pitcher_strikes_spread,pitcher_balls_spread,pitcher_speed,pitcher_fast_spread,pitcher_offspeed_spread,pitcher_ab_count
0,0,5a5c2129-3f87-43ee-8f6b-a2b6ca91bab4,Matthew,Marlowe,Mariners,LF,L,OF,0.348837,0.0,0.0,0.2,0.5,0.0,0.0,0.2,0.309091,0.419355,90.43,0.375,0.315789,84.0,1,d3a08e67-05e9-4f54-b7b1-807c8e9beaf2,Logan,Gilbert,Mariners,SP,R,0.270134,0.0,0.333333,0.4,0.424497,0.0,0.666667,0.6,0.694631,0.305369,90.218487,0.692953,0.307047,590.0


In [12]:
# Calculate handed_matchup
X_new["handed_matchup"] = X_new.apply((lambda row: 0 if row["hitter_hand"] == row["pitcher_hand"] else 1), axis=1)
X_new

# Calculate match_up_ab_count_delta
X_new["match_up_ab_count_delta"] = X_new["pitcher_ab_count"] - X_new["hitter_ab_count"]

In [13]:
# Remove columns
X_new = X_new.drop(columns=["id", "first_name", "last_name", "team_nickname", "primary_position",
                            "hitter_hand", "pitcher_hand", "pitcher_ab_count", "hitter_ab_count"])
X_new

Unnamed: 0,index,hitter_position,hitter_previous_stats_szn,rolling_1ab,rolling_3ab,rolling_10ab,hitter_previous_stats_szn_slug,rolling_1ab_slug,rolling_3ab_slug,rolling_10ab_slug,hitter_strikes_eff,hitter_balls_eff,hitter_success_speed,hitter_fast_eff,hitter_offspeed_eff,index.1,pitcher_previous_stats_szn,rolling_1pitch,rolling_3pitch,rolling_10pitch,pitcher_previous_stats_szn_bases,rolling_1pitch_bases,rolling_3pitch_bases,rolling_10pitch_bases,pitcher_strikes_spread,pitcher_balls_spread,pitcher_speed,pitcher_fast_spread,pitcher_offspeed_spread,handed_matchup,match_up_ab_count_delta
0,0,OF,0.348837,0.0,0.0,0.2,0.5,0.0,0.0,0.2,0.309091,0.419355,90.43,0.375,0.315789,1,0.270134,0.0,0.333333,0.4,0.424497,0.0,0.666667,0.6,0.694631,0.305369,90.218487,0.692953,0.307047,1,506.0


In [14]:
X_new.shape

(1, 31)

In [15]:
X_new

Unnamed: 0,index,hitter_position,hitter_previous_stats_szn,rolling_1ab,rolling_3ab,rolling_10ab,hitter_previous_stats_szn_slug,rolling_1ab_slug,rolling_3ab_slug,rolling_10ab_slug,hitter_strikes_eff,hitter_balls_eff,hitter_success_speed,hitter_fast_eff,hitter_offspeed_eff,index.1,pitcher_previous_stats_szn,rolling_1pitch,rolling_3pitch,rolling_10pitch,pitcher_previous_stats_szn_bases,rolling_1pitch_bases,rolling_3pitch_bases,rolling_10pitch_bases,pitcher_strikes_spread,pitcher_balls_spread,pitcher_speed,pitcher_fast_spread,pitcher_offspeed_spread,handed_matchup,match_up_ab_count_delta
0,0,OF,0.348837,0.0,0.0,0.2,0.5,0.0,0.0,0.2,0.309091,0.419355,90.43,0.375,0.315789,1,0.270134,0.0,0.333333,0.4,0.424497,0.0,0.666667,0.6,0.694631,0.305369,90.218487,0.692953,0.307047,1,506.0


In [16]:
data1 = {'Nom': ['Alice', 'Bob'],
         'Âge': [25, 30]}

data2 = {'Nom': ['Charlie', 'David'],
         'Âge': [35, 40]}

df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

# Concaténez les deux lignes en un nouveau DataFrame
nouveau_df = pd.concat([df1.iloc[0], df2.iloc[0]])

# Affichez le nouveau DataFrame
print(nouveau_df)

Nom      Alice
Âge         25
Nom    Charlie
Âge         35
Name: 0, dtype: object


In [17]:
hitters[hitters.first_name=="Carlos"]

Unnamed: 0,id,first_name,last_name,team_nickname,primary_position,hitter_hand,hitter_position,hitter_previous_stats_szn,rolling_1ab,rolling_3ab,rolling_10ab,hitter_previous_stats_szn_slug,rolling_1ab_slug,rolling_3ab_slug,rolling_10ab_slug,hitter_strikes_eff,hitter_balls_eff,hitter_success_speed,hitter_fast_eff,hitter_offspeed_eff,hitter_ab_count
50,6049097c-f59f-48d4-ae57-38f6027235a3,Carlos,Pérez,Athletics,C,R,C,0.288462,0.0,0.0,0.2,0.410256,0.0,0.0,0.2,0.237113,0.372881,89.56,0.244444,0.348485,151.0
151,3259a7ad-0490-4912-bd86-d06bf3ac91e0,Carlos,Correa,Twins,SS,R,IF,0.305439,0.0,0.333333,0.4,0.460251,0.0,0.333333,0.4,0.28013,0.350877,89.85274,0.289855,0.326733,469.0
179,725c41a3-311d-43ee-a2f9-586fd4afa383,Carlos,Pérez,White Sox,C,R,C,0.259259,0.0,0.333333,0.4,0.333333,0.0,0.333333,0.5,0.25,0.285714,92.271429,0.214286,0.307692,22.0
243,86167542-e9cf-4d20-8022-7ed53dc09d0a,Carlos,Santana,Brewers,1B,L,IF,0.302277,1.0,0.333333,0.3,0.467909,1.0,0.333333,0.4,0.24924,0.415584,90.037671,0.297872,0.308458,476.0


In [None]:
test = hitters
test.head