# Merging table to create Dataset for feature engineering

In [1]:
import pandas as pd
import numpy as np
import datetime as dt

pd.set_option('display.max_columns', None)

In [2]:
# Data import
games = pd.read_csv("../raw_data/games_w_venue.csv")
players = pd.read_csv("../raw_data/players.csv")
stadiums = pd.read_csv("../raw_data/stadiums.csv")
teams = pd.read_csv("../raw_data/teams.csv")
data = pd.read_csv("../raw_data/all_ab_raw_data_w_target.csv")

In [3]:
data.shape

(142307, 25)

In [4]:
# Merging data and games
games = games.rename(columns={"id": "game_id"})
data = data.merge(games, how="left", on='game_id')
data.shape

(142307, 44)

In [5]:
# Merging data and hitters
hitters = players[~players.id.duplicated(keep="first")]
hitters = hitters.add_prefix("hitter_")
data = data.merge(hitters, how="left", on="hitter_id")
data.shape

(142307, 50)

In [6]:
# Merging data and pitchers
pitchers = players[~players.id.duplicated(keep="first")]
pitchers = pitchers.add_prefix("pitcher_")
data = data.merge(pitchers, how="left", on="pitcher_id")
data.shape

(142307, 56)

In [7]:
# Merging data and home team
home_team = teams.add_prefix("home_team_")
data = data.rename(columns={"home_team": "home_team_id"})
data = data.merge(home_team, how="left", on="home_team_id")
data.shape

(142307, 59)

In [8]:
# Merging data and away team
away_team = teams.add_prefix("away_team_")
data = data.rename(columns={"away_team": "away_team_id"})
data = data.merge(away_team, how="left", on="away_team_id")
data.shape

(142307, 62)

In [9]:
# Merging data and stadium
venue = stadiums.add_prefix("stadium_")
data = data.rename(columns={"venue_id": "stadium_id"})
data = data.merge(venue, how="left", on="stadium_id")
data.shape

(142307, 76)

In [10]:
# Merging data and away adress
away_stadium = stadiums
away_stadium["abbr"] = teams.abbr
away_stadium = away_stadium.add_prefix("away_stadium_")
away_stadium = away_stadium.rename(columns={"away_stadium_abbr": "away_team_abbr"})
data = data.merge(away_stadium, how="left", on="away_team_abbr")
data.shape

(142307, 91)

In [14]:
data_all = data
data_all.to_csv('../raw_data/all_data_merged.csv', index=True)

In [12]:
data.head(3)

Unnamed: 0.1,id,game_id,inning,side,hitter_id,hitter_hand,pitcher_id,pitcher_hand,description,temp_f,weather_condition,humidity,wind_speed_mph,at_bat_end_time,pitch_location_zone,pitch_type_code,pitch_type_des,pitch_speed_mph,pitch_count_at_bat,pitcher_pitch_count_at_bat_start,outs_at_start,output_code,play_outcome,mc_target,y_target,Unnamed: 0,status,coverage,game_number,day_night,scheduled,home_team_id,away_team_id,attendance,duration,double_header,entry_mode,reference,venue,home,away,broadcast,rescheduled,stadium_id,hitter_Unnamed: 0,hitter_player_name,hitter_team_id,hitter_team_name,hitter_position,hitter_primary_position,pitcher_Unnamed: 0,pitcher_player_name,pitcher_team_id,pitcher_team_name,pitcher_position,pitcher_primary_position,home_team_name,home_team_market,home_team_abbr,away_team_name,away_team_market,away_team_abbr,stadium_name,stadium_market,stadium_capacity,stadium_surface,stadium_address,stadium_city,stadium_state,stadium_zip,stadium_country,stadium_field_orientation,stadium_stadium_type,stadium_time_zone,stadium_lat,stadium_lon,away_stadium_id,away_stadium_name,away_stadium_market,away_stadium_capacity,away_stadium_surface,away_stadium_address,away_stadium_city,away_stadium_state,away_stadium_zip,away_stadium_country,away_stadium_field_orientation,away_stadium_stadium_type,away_stadium_time_zone,away_stadium_lat,away_stadium_lon
0,67cfd85d-029a-4e5d-9ad3-cf7c1f29c303,e60d1770-2789-4d00-b94a-5033a7fa89a9,1,T,084d2514-9ffb-414e-ae16-3bc690aaad51,R,e28344cd-b08e-43ec-adcb-f6693203cf82,R,Mookie Betts flies out to deep left field to C...,51.0,"Light Rain, Mist",96.0,4.0,2023-05-08T23:43:05+00:00,2.0,FF,Four-Seam Fastball,96.5,4.0,0.0,0.0,oFO,IPO,0,0,1700,closed,full,1,N,2023-05-08T23:40:00+00:00,dcfd5266-00ce-442c-bc09-264cd20cf455,ef64da7f-cfaf-4300-87b0-9313386b977c,22847.0,2:18,False,STOMP,718257,"{'name': 'American Family Field', 'market': 'M...","{'name': 'Brewers', 'market': 'Milwaukee', 'ab...","{'name': 'Dodgers', 'market': 'Los Angeles', '...",{'network': 'MLB Network'},,3d13c8a7-283f-482b-ade1-441e25b6465d,432,"Betts, Markus",ef64da7f-cfaf-4300-87b0-9313386b977c,Dodgers,OF,RF,916,"Peralta, Freddy",dcfd5266-00ce-442c-bc09-264cd20cf455,Brewers,P,SP,Brewers,Milwaukee,MIL,Dodgers,Los Angeles,LAD,American Family Field,Milwaukee,41700.0,grass,One Brewers Way,Milwaukee,WI,53214,USA,SE,retractable,US/Central,43.028739,-87.971303,66a19c3d-24fe-477d-bee7-c6ef1b98352f,Dodger Stadium,Los Angeles,56000.0,grass,1000 Vin Scully Avenue,Los Angeles,CA,90012,USA,NE,outdoor,US/Pacific,34.074541,-118.240888
1,1ec4235d-7ee4-47f7-a262-40ef0a76acd2,e60d1770-2789-4d00-b94a-5033a7fa89a9,1,T,ea1a2111-44cc-4996-babb-9439465e6760,L,e28344cd-b08e-43ec-adcb-f6693203cf82,R,Freddie Freeman lines out to right field to Ty...,51.0,"Light Rain, Mist",96.0,4.0,2023-05-08T23:45:01+00:00,13.0,CU,Curveball,76.3,5.0,4.0,1.0,oLO,IPO,0,0,1700,closed,full,1,N,2023-05-08T23:40:00+00:00,dcfd5266-00ce-442c-bc09-264cd20cf455,ef64da7f-cfaf-4300-87b0-9313386b977c,22847.0,2:18,False,STOMP,718257,"{'name': 'American Family Field', 'market': 'M...","{'name': 'Brewers', 'market': 'Milwaukee', 'ab...","{'name': 'Dodgers', 'market': 'Los Angeles', '...",{'network': 'MLB Network'},,3d13c8a7-283f-482b-ade1-441e25b6465d,427,"Freeman, Frederick",ef64da7f-cfaf-4300-87b0-9313386b977c,Dodgers,IF,1B,916,"Peralta, Freddy",dcfd5266-00ce-442c-bc09-264cd20cf455,Brewers,P,SP,Brewers,Milwaukee,MIL,Dodgers,Los Angeles,LAD,American Family Field,Milwaukee,41700.0,grass,One Brewers Way,Milwaukee,WI,53214,USA,SE,retractable,US/Central,43.028739,-87.971303,66a19c3d-24fe-477d-bee7-c6ef1b98352f,Dodger Stadium,Los Angeles,56000.0,grass,1000 Vin Scully Avenue,Los Angeles,CA,90012,USA,NE,outdoor,US/Pacific,34.074541,-118.240888
2,d5d817fa-a490-45b3-9884-0e6a70d97e3e,e60d1770-2789-4d00-b94a-5033a7fa89a9,1,T,8e42fd09-b9d0-4566-b960-e107f580de46,L,e28344cd-b08e-43ec-adcb-f6693203cf82,R,"Jason Heyward grounds out to second base, Bric...",51.0,"Light Rain, Mist",96.0,4.0,2023-05-08T23:45:36+00:00,9.0,FF,Four-Seam Fastball,94.8,1.0,9.0,2.0,oGO,IPO,0,0,1700,closed,full,1,N,2023-05-08T23:40:00+00:00,dcfd5266-00ce-442c-bc09-264cd20cf455,ef64da7f-cfaf-4300-87b0-9313386b977c,22847.0,2:18,False,STOMP,718257,"{'name': 'American Family Field', 'market': 'M...","{'name': 'Brewers', 'market': 'Milwaukee', 'ab...","{'name': 'Dodgers', 'market': 'Los Angeles', '...",{'network': 'MLB Network'},,3d13c8a7-283f-482b-ade1-441e25b6465d,426,"Heyward, Jason",ef64da7f-cfaf-4300-87b0-9313386b977c,Dodgers,OF,RF,916,"Peralta, Freddy",dcfd5266-00ce-442c-bc09-264cd20cf455,Brewers,P,SP,Brewers,Milwaukee,MIL,Dodgers,Los Angeles,LAD,American Family Field,Milwaukee,41700.0,grass,One Brewers Way,Milwaukee,WI,53214,USA,SE,retractable,US/Central,43.028739,-87.971303,66a19c3d-24fe-477d-bee7-c6ef1b98352f,Dodger Stadium,Los Angeles,56000.0,grass,1000 Vin Scully Avenue,Los Angeles,CA,90012,USA,NE,outdoor,US/Pacific,34.074541,-118.240888


In [15]:
data.describe()

Unnamed: 0.1,inning,temp_f,humidity,wind_speed_mph,pitch_location_zone,pitch_speed_mph,pitch_count_at_bat,pitcher_pitch_count_at_bat_start,outs_at_start,mc_target,y_target,Unnamed: 0,game_number,attendance,reference,hitter_Unnamed: 0,pitcher_Unnamed: 0,stadium_capacity,stadium_lat,stadium_lon,away_stadium_capacity,away_stadium_lat,away_stadium_lon
count,142307.0,142307.0,142307.0,142307.0,142307.0,142307.0,142307.0,142307.0,142307.0,142307.0,142307.0,142307.0,142307.0,141860.0,142307.0,142307.0,142307.0,141918.0,141918.0,141918.0,142232.0,142232.0,142232.0
mean,4.950733,73.288419,54.083784,11.195423,7.856016,89.056774,3.896288,29.474291,0.966249,0.464938,0.317061,945.019381,1.014764,29309.14687,717835.681878,731.744784,739.614987,42386.568969,38.125192,-92.608178,42388.579989,38.224083,-92.39908
std,2.579747,12.714632,19.634731,221.839228,4.076619,6.106812,1.892109,27.078681,0.817366,0.861797,0.465333,546.193593,0.120607,11309.926713,546.487728,434.284794,437.164725,5477.181213,5.018912,16.401666,5487.732362,5.033276,16.41636
min,1.0,34.0,4.0,1.0,1.0,33.7,1.0,-6.0,0.0,0.0,0.0,0.0,1.0,0.0,716887.0,1.0,0.0,25025.0,25.778057,-122.390621,25025.0,25.778057,-122.390621
25%,3.0,65.0,40.0,4.0,5.0,84.6,2.0,7.0,0.0,0.0,0.0,474.0,1.0,20286.0,717364.0,370.0,360.0,40000.0,33.890672,-104.993349,40000.0,33.890672,-104.993349
50%,5.0,74.0,56.0,8.0,8.0,89.8,4.0,20.0,1.0,0.0,0.0,943.0,1.0,30578.0,717834.0,730.0,715.0,41376.0,39.097736,-87.656054,41700.0,39.283787,-87.634833
75%,7.0,82.0,69.0,12.0,12.0,93.9,5.0,50.0,2.0,1.0,1.0,1416.0,1.0,38617.0,718308.0,1110.0,1100.0,45971.0,41.830066,-80.006409,45971.0,41.830066,-80.006409
max,14.0,117.0,100.0,22369.0,14.0,104.8,16.0,117.0,3.0,4.0,1.0,2430.0,2.0,55565.0,718782.0,1498.0,1513.0,56000.0,47.589904,-71.098782,56000.0,47.589904,-71.098782


In [16]:
data.duplicated().sum()

0

In [23]:
columns_to_remove_fp = list(('description', 'play_outcome', 'mc_target', 
                            'Unnamed: 0', 'status', 'coverage', 'game_number', 
                            'duration', 'double_header', 'entry_mode', 'reference', 
                            'venue', 'home', 'away', 'broadcast', 'rescheduled','hitter_team_id', 'hitter_team_name','pitcher_position',
                            'pitcher_team_id', 'pitcher_team_name', 'home_team_name', 'home_team_market', 'home_team_abbr',
                            'away_team_name', 'away_team_market', 'away_team_abbr', 'stadium_name', 'stadium_market', 'stadium_surface', 'stadium_address',
                            'stadium_city', 'stadium_state', 'stadium_zip', 'stadium_country', 'stadium_field_orientation', 'stadium_time_zone', 'away_stadium_id',
                            'away_stadium_name', 'away_stadium_market', 'away_stadium_surface', 'away_stadium_address', 'away_stadium_city', 'away_stadium_state', 'away_stadium_zip',
                            'away_stadium_country', 'away_stadium_field_orientation', 'away_stadium_stadium_type', 'away_stadium_time_zone', 'pitch_type_des'))

In [24]:
data = data.drop(columns=columns_to_remove_fp)

In [25]:
#Cleaning up data points
data['outs_at_start'] = data['outs_at_start'].apply(lambda x: 2 if x == 3 else x)
data['pitcher_pitch_count_at_bat_start'] = data['pitcher_pitch_count_at_bat_start'].apply(lambda x: 0 if x < 0 else x)
data['wind_speed_mph'] = data['wind_speed_mph'].apply(lambda x: 50 if x > 50 else x)

In [26]:
data.describe()

Unnamed: 0,inning,temp_f,humidity,wind_speed_mph,pitch_location_zone,pitch_speed_mph,pitch_count_at_bat,pitcher_pitch_count_at_bat_start,outs_at_start,y_target,attendance,hitter_Unnamed: 0,pitcher_Unnamed: 0,stadium_capacity,stadium_lat,stadium_lon,away_stadium_capacity,away_stadium_lat,away_stadium_lon
count,142307.0,142307.0,142307.0,142307.0,142307.0,142307.0,142307.0,142307.0,142307.0,142307.0,141860.0,142307.0,142307.0,141918.0,141918.0,141918.0,142232.0,142232.0,142232.0
mean,4.950733,73.288419,54.083784,8.972419,7.856016,89.056774,3.896288,29.476069,0.966221,0.317061,29309.14687,731.744784,739.614987,42386.568969,38.125192,-92.608178,42388.579989,38.224083,-92.39908
std,2.579747,12.714632,19.634731,5.069735,4.076619,6.106812,1.892109,27.076677,0.817314,0.465333,11309.926713,434.284794,437.164725,5477.181213,5.018912,16.401666,5487.732362,5.033276,16.41636
min,1.0,34.0,4.0,1.0,1.0,33.7,1.0,0.0,0.0,0.0,0.0,1.0,0.0,25025.0,25.778057,-122.390621,25025.0,25.778057,-122.390621
25%,3.0,65.0,40.0,4.0,5.0,84.6,2.0,7.0,0.0,0.0,20286.0,370.0,360.0,40000.0,33.890672,-104.993349,40000.0,33.890672,-104.993349
50%,5.0,74.0,56.0,8.0,8.0,89.8,4.0,20.0,1.0,0.0,30578.0,730.0,715.0,41376.0,39.097736,-87.656054,41700.0,39.283787,-87.634833
75%,7.0,82.0,69.0,12.0,12.0,93.9,5.0,50.0,2.0,1.0,38617.0,1110.0,1100.0,45971.0,41.830066,-80.006409,45971.0,41.830066,-80.006409
max,14.0,117.0,100.0,50.0,14.0,104.8,16.0,117.0,2.0,1.0,55565.0,1498.0,1513.0,56000.0,47.589904,-71.098782,56000.0,47.589904,-71.098782


In [27]:
data.head(2)

Unnamed: 0,id,game_id,inning,side,hitter_id,hitter_hand,pitcher_id,pitcher_hand,temp_f,weather_condition,humidity,wind_speed_mph,at_bat_end_time,pitch_location_zone,pitch_type_code,pitch_speed_mph,pitch_count_at_bat,pitcher_pitch_count_at_bat_start,outs_at_start,output_code,y_target,day_night,scheduled,home_team_id,away_team_id,attendance,stadium_id,hitter_Unnamed: 0,hitter_player_name,hitter_position,hitter_primary_position,pitcher_Unnamed: 0,pitcher_player_name,pitcher_primary_position,stadium_capacity,stadium_stadium_type,stadium_lat,stadium_lon,away_stadium_capacity,away_stadium_lat,away_stadium_lon
0,67cfd85d-029a-4e5d-9ad3-cf7c1f29c303,e60d1770-2789-4d00-b94a-5033a7fa89a9,1,T,084d2514-9ffb-414e-ae16-3bc690aaad51,R,e28344cd-b08e-43ec-adcb-f6693203cf82,R,51.0,"Light Rain, Mist",96.0,4.0,2023-05-08T23:43:05+00:00,2.0,FF,96.5,4.0,0.0,0.0,oFO,0,N,2023-05-08T23:40:00+00:00,dcfd5266-00ce-442c-bc09-264cd20cf455,ef64da7f-cfaf-4300-87b0-9313386b977c,22847.0,3d13c8a7-283f-482b-ade1-441e25b6465d,432,"Betts, Markus",OF,RF,916,"Peralta, Freddy",SP,41700.0,retractable,43.028739,-87.971303,56000.0,34.074541,-118.240888
1,1ec4235d-7ee4-47f7-a262-40ef0a76acd2,e60d1770-2789-4d00-b94a-5033a7fa89a9,1,T,ea1a2111-44cc-4996-babb-9439465e6760,L,e28344cd-b08e-43ec-adcb-f6693203cf82,R,51.0,"Light Rain, Mist",96.0,4.0,2023-05-08T23:45:01+00:00,13.0,CU,76.3,5.0,4.0,1.0,oLO,0,N,2023-05-08T23:40:00+00:00,dcfd5266-00ce-442c-bc09-264cd20cf455,ef64da7f-cfaf-4300-87b0-9313386b977c,22847.0,3d13c8a7-283f-482b-ade1-441e25b6465d,427,"Freeman, Frederick",IF,1B,916,"Peralta, Freddy",SP,41700.0,retractable,43.028739,-87.971303,56000.0,34.074541,-118.240888


In [28]:
#Coverting columns to the correct dtype
data["scheduled"] = pd.to_datetime(data["scheduled"])
data["at_bat_end_time"] = pd.to_datetime(data["at_bat_end_time"])

In [29]:
data.head(2)

Unnamed: 0,id,game_id,inning,side,hitter_id,hitter_hand,pitcher_id,pitcher_hand,temp_f,weather_condition,humidity,wind_speed_mph,at_bat_end_time,pitch_location_zone,pitch_type_code,pitch_speed_mph,pitch_count_at_bat,pitcher_pitch_count_at_bat_start,outs_at_start,output_code,y_target,day_night,scheduled,home_team_id,away_team_id,attendance,stadium_id,hitter_Unnamed: 0,hitter_player_name,hitter_position,hitter_primary_position,pitcher_Unnamed: 0,pitcher_player_name,pitcher_primary_position,stadium_capacity,stadium_stadium_type,stadium_lat,stadium_lon,away_stadium_capacity,away_stadium_lat,away_stadium_lon
0,67cfd85d-029a-4e5d-9ad3-cf7c1f29c303,e60d1770-2789-4d00-b94a-5033a7fa89a9,1,T,084d2514-9ffb-414e-ae16-3bc690aaad51,R,e28344cd-b08e-43ec-adcb-f6693203cf82,R,51.0,"Light Rain, Mist",96.0,4.0,2023-05-08 23:43:05+00:00,2.0,FF,96.5,4.0,0.0,0.0,oFO,0,N,2023-05-08 23:40:00+00:00,dcfd5266-00ce-442c-bc09-264cd20cf455,ef64da7f-cfaf-4300-87b0-9313386b977c,22847.0,3d13c8a7-283f-482b-ade1-441e25b6465d,432,"Betts, Markus",OF,RF,916,"Peralta, Freddy",SP,41700.0,retractable,43.028739,-87.971303,56000.0,34.074541,-118.240888
1,1ec4235d-7ee4-47f7-a262-40ef0a76acd2,e60d1770-2789-4d00-b94a-5033a7fa89a9,1,T,ea1a2111-44cc-4996-babb-9439465e6760,L,e28344cd-b08e-43ec-adcb-f6693203cf82,R,51.0,"Light Rain, Mist",96.0,4.0,2023-05-08 23:45:01+00:00,13.0,CU,76.3,5.0,4.0,1.0,oLO,0,N,2023-05-08 23:40:00+00:00,dcfd5266-00ce-442c-bc09-264cd20cf455,ef64da7f-cfaf-4300-87b0-9313386b977c,22847.0,3d13c8a7-283f-482b-ade1-441e25b6465d,427,"Freeman, Frederick",IF,1B,916,"Peralta, Freddy",SP,41700.0,retractable,43.028739,-87.971303,56000.0,34.074541,-118.240888


In [30]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 142307 entries, 0 to 142306
Data columns (total 41 columns):
 #   Column                            Non-Null Count   Dtype              
---  ------                            --------------   -----              
 0   id                                142307 non-null  object             
 1   game_id                           142307 non-null  object             
 2   inning                            142307 non-null  int64              
 3   side                              142307 non-null  object             
 4   hitter_id                         142307 non-null  object             
 5   hitter_hand                       142307 non-null  object             
 6   pitcher_id                        142307 non-null  object             
 7   pitcher_hand                      142307 non-null  object             
 8   temp_f                            142307 non-null  float64            
 9   weather_condition                 142307 non-nul

In [31]:
data = data.sort_values(["at_bat_end_time", "inning"], ignore_index=True, ascending=False)

In [32]:
data.head(2)

Unnamed: 0,id,game_id,inning,side,hitter_id,hitter_hand,pitcher_id,pitcher_hand,temp_f,weather_condition,humidity,wind_speed_mph,at_bat_end_time,pitch_location_zone,pitch_type_code,pitch_speed_mph,pitch_count_at_bat,pitcher_pitch_count_at_bat_start,outs_at_start,output_code,y_target,day_night,scheduled,home_team_id,away_team_id,attendance,stadium_id,hitter_Unnamed: 0,hitter_player_name,hitter_position,hitter_primary_position,pitcher_Unnamed: 0,pitcher_player_name,pitcher_primary_position,stadium_capacity,stadium_stadium_type,stadium_lat,stadium_lon,away_stadium_capacity,away_stadium_lat,away_stadium_lon
0,5f644e80-7968-48de-9c55-37bdbf88b0df,b650873b-0a63-4fe2-902b-88e1c4b36ae3,9,T,a522f82a-e920-4ece-8e29-3f16630bf635,L,22dabc06-611e-4d36-81af-7153e7ba86e6,R,96.0,Partly cloudy,28.0,11.0,2023-08-23 04:31:54+00:00,2.0,FF,93.0,2.0,26.0,2.0,oGO,0,N,2023-08-23 01:40:00+00:00,25507be1-6a68-4267-bd82-e097d94b359b,d99f919b-1534-4516-8e8a-9cd106c6d8cd,19003.0,bf05de0d-7ced-4a19-8e17-2bbd985f8a92,972,"Jankowski, Travis",OF,LF,52,"Sewald, Paul",RP,48359.0,retractable,33.445302,-112.066687,40000.0,32.7473,-97.08182
1,69e5c3fb-e338-43c0-9cf4-78f0eefa7b02,b650873b-0a63-4fe2-902b-88e1c4b36ae3,9,T,a8efa694-2d64-4ad3-bf39-7d4f9006be7f,R,22dabc06-611e-4d36-81af-7153e7ba86e6,R,96.0,Partly cloudy,28.0,11.0,2023-08-23 04:30:56+00:00,14.0,ST,85.0,6.0,20.0,1.0,kKS,0,N,2023-08-23 01:40:00+00:00,25507be1-6a68-4267-bd82-e097d94b359b,d99f919b-1534-4516-8e8a-9cd106c6d8cd,19003.0,bf05de0d-7ced-4a19-8e17-2bbd985f8a92,968,"Garver, Mitchell",C,C,52,"Sewald, Paul",RP,48359.0,retractable,33.445302,-112.066687,40000.0,32.7473,-97.08182


In [33]:
secondary_drop = list(('scheduled', 'away_stadium_capacity', 'pitch_type_code', 
                            'output_code', 'pitch_location_zone'))

In [34]:
data = data.drop(columns=secondary_drop)

In [35]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 142307 entries, 0 to 142306
Data columns (total 36 columns):
 #   Column                            Non-Null Count   Dtype              
---  ------                            --------------   -----              
 0   id                                142307 non-null  object             
 1   game_id                           142307 non-null  object             
 2   inning                            142307 non-null  int64              
 3   side                              142307 non-null  object             
 4   hitter_id                         142307 non-null  object             
 5   hitter_hand                       142307 non-null  object             
 6   pitcher_id                        142307 non-null  object             
 7   pitcher_hand                      142307 non-null  object             
 8   temp_f                            142307 non-null  float64            
 9   weather_condition                 142307 non-nul

In [36]:
data.to_csv('../raw_data/final_raw_data.csv', index=True)