# MLB Data Retrieval

This notebook finds all of the 2018 Postseason MLB games from the sportradar api. Once the games are retrieved, the home plate umpire is found for the game as well as all pitches that were called ball or strike. The data is then saved to a csv where the data can be analyzed using pandas and matplotlib.

In [23]:
# Dependencies and Setup
import requests
import csv
import time

# Import API key
from config import api_key

# Incorporated postseason_games to determine the postseason games for the 2018 season
from postseason_games import games
from get_data import get_umpire, get_game_by_id

In [92]:

# the URL for the API, including the API key
url = f'http://api.sportradar.us/mlb/trial/v6.5/en/games/2018/PST/schedule.json?api_key={api_key}'

resp = requests.get(url).json()

In [93]:
games = [x['id'] for x in resp['games']]

In [98]:
# get the data from sportradar

pitch_data_games = list()

i = 0
k = 0

# for loop that loops through sportradar and pulls down the pitch data from sportradar to pitch_data_games
#    gets the fields [game_id, Umpire name, home or away, call type, pitch X and Y data, pitcher hand (L/R), 
#         batther hand (L/R), pitch type, and pitch speed]

for game in games:
    try:
        k += 1
        time.sleep(1)
        game_url = f'http://api.sportradar.us/mlb/trial/v6.5/en/games/{game}/pbp.json?api_key={api_key}'
        game_resp = requests.get(game_url).json()
        if 'pitch_x' in game_resp['game']['innings'][1]['halfs'][0]['events'][0]['at_bat']['events'][0]['pitcher'].keys():
            pitch_data_games.append(game)
            i += 1
        print(f'{i}/{k}')
    except Exception as x:
        print(x)
        print(f'{i}/{k}')
        
# pitch_data_games saved to postseason_games.py to save api calls

1/1
list index out of range
1/2
2/3
3/4
4/5
5/6
6/7
list index out of range
6/8
7/9
list index out of range
7/10
8/11
9/12
10/13
list index out of range
10/14
11/15
12/16
13/17
list index out of range
13/18
14/19
15/20
16/21
17/22
18/23
19/24
20/25
21/26
22/27
23/28
list index out of range
23/29
24/30
list index out of range
24/31
25/32
26/33
list index out of range
26/34
list index out of range
26/35
27/36
28/37
list index out of range
28/38
29/39
30/40
31/41
32/42
33/43


['000023f8-e153-4dcd-9ba0-627ecc9f162e',
 '0a408f5b-3ea6-4500-84d4-d01a56b3f3c2',
 '0af06a7c-55e9-49e2-bf18-ba91e87f112f',
 '0c29784b-96cf-478c-b2b4-5dc59c4dac2f',
 '0dc994a5-3fe9-419d-a6dd-0727c01c38ef',
 '2a747bb0-2213-4db6-8173-14a8a1060c36',
 '37d6d617-2b4a-4c81-b257-504bbf6793ed',
 '3caab0f7-253b-4bde-bf45-9e0838fc2118',
 '472ad042-b1e0-4a15-93ee-34297fcfeb2d',
 '4aaf2a8a-cab1-4085-b011-fe555e723193',
 '560f46cd-679d-4bf6-93bb-74b322d33c17',
 '5b58eaf5-792a-4c9f-9c83-39094d8b566e',
 '5d1f85bd-4f87-47cc-9975-52dedd2302cc',
 '6721b2fd-829c-472a-bb23-026068987859',
 '6b10edd9-8a3f-4e5e-bcc5-e65de6c68cff',
 '6b7e96c5-b7ee-449d-8756-cbb25b2ce636',
 '7a524055-608d-45f4-9219-2e6b3ad47a0b',
 '838f51b1-8fa9-4742-ad1c-db05ef23f9a5',
 '84d375dc-ec1e-4bb5-9260-c018c820dad4',
 '86a0958a-2b1b-4ba3-990d-45f3c6214611',
 '8d55209d-2259-4463-9b90-cb17e91bc50f',
 '8e4cb875-0d9a-417c-b686-86bbc99f6750',
 '93fb087a-6881-4811-9fdf-e16159bb7350',
 'a2117ef8-5f78-4c49-983a-8f86aa584ef1',
 'cee54eb0-bd41-

In [31]:
# pitches that are called ball or strike

# setus up the dataframe and headder names
outcome_ids = ['kKL', 'bB', 'aKLAD1', 'aKLAD2', 'aKLAD3', 'aKLAD4', 'oKLT1', 'oKLT2', 'oKLT3', 'oKLT4']
csv_header = ['Umpire', 'Game_ID', 'Home/Away', 'Called Ball/Strike', 'Pitch_X', 'Pitch_Y', 'Pitcher_Hand',
              'Batter_Hand', 'Pitch_Type', 'Pitch_Speed']

# with, with a set of nested for loops that pull (and filters) the fields [umpire, game, t_b, call,
#                         pitch_x, pitch_y, pitcher_hand, batter_hand, pitch_type, pitch_speed]
#                         from the data that was downloaded



with open('Resources/pitch_data.csv', 'w', newline='') as fout:
    csvwriter = csv.writer(fout, delimiter=',')
    csvwriter.writerow(csv_header)
    for game in games:
        print(game)
        time.sleep(1.1)
        game_json = get_game_by_id(game, api_key)
        time.sleep(1.1)
        umpire = get_umpire(game, api_key)
        for inning in game_json['game']['innings'][1:]:
            for half in inning['halfs']:
                t_b = half['half']
                if t_b == 'T':
                    t_b = 'Away'
                else:
                    t_b = 'Home'
                for event in half['events']:
                    try:
                        for pitch in event['at_bat']['events']:
                            if pitch['outcome_id'] in outcome_ids:
                                if pitch['outcome_id'] == 'bB':
                                    call = 'Ball'
                                else:
                                    call = 'Strike'
                                pitch_type = pitch['pitcher']['pitch_type']
                                pitch_speed = pitch['pitcher']['pitch_speed']
                                pitcher_hand = pitch['pitcher']['pitcher_hand']
                                batter_hand = pitch['pitcher']['hitter_hand']
                                pitch_x = pitch['pitcher']['pitch_x']
                                pitch_y = pitch['pitcher']['pitch_y']
                                csvwriter.writerow([umpire, game, t_b, call, pitch_x, pitch_y, pitcher_hand,
                                                    batter_hand, pitch_type, pitch_speed])
                    except:
                        pass
                            
print('done')

000023f8-e153-4dcd-9ba0-627ecc9f162e
0a408f5b-3ea6-4500-84d4-d01a56b3f3c2
0af06a7c-55e9-49e2-bf18-ba91e87f112f
0c29784b-96cf-478c-b2b4-5dc59c4dac2f
0dc994a5-3fe9-419d-a6dd-0727c01c38ef
2a747bb0-2213-4db6-8173-14a8a1060c36
37d6d617-2b4a-4c81-b257-504bbf6793ed
3caab0f7-253b-4bde-bf45-9e0838fc2118
472ad042-b1e0-4a15-93ee-34297fcfeb2d
4aaf2a8a-cab1-4085-b011-fe555e723193
560f46cd-679d-4bf6-93bb-74b322d33c17
5b58eaf5-792a-4c9f-9c83-39094d8b566e
5d1f85bd-4f87-47cc-9975-52dedd2302cc
6721b2fd-829c-472a-bb23-026068987859
6b10edd9-8a3f-4e5e-bcc5-e65de6c68cff
6b7e96c5-b7ee-449d-8756-cbb25b2ce636
7a524055-608d-45f4-9219-2e6b3ad47a0b
838f51b1-8fa9-4742-ad1c-db05ef23f9a5
84d375dc-ec1e-4bb5-9260-c018c820dad4
86a0958a-2b1b-4ba3-990d-45f3c6214611
8d55209d-2259-4463-9b90-cb17e91bc50f
8e4cb875-0d9a-417c-b686-86bbc99f6750
93fb087a-6881-4811-9fdf-e16159bb7350
a2117ef8-5f78-4c49-983a-8f86aa584ef1
cee54eb0-bd41-4183-9f68-bbbb0a68bbeb
d46f53fa-955b-4474-8728-7a60b114e2a8
dd0a0d8b-bf3b-4075-9bc3-99b207024401
e