Skip to content

Commit

Permalink
Fixed issue with scraping shifts in second half of 2009 season
Browse files Browse the repository at this point in the history
  • Loading branch information
HarryShomer committed Feb 12, 2018
1 parent 781bbc3 commit 39181b6
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 4 deletions.
Binary file modified .DS_Store
Binary file not shown.
Binary file modified hockey_scraper/.DS_Store
Binary file not shown.
8 changes: 6 additions & 2 deletions hockey_scraper/game_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,11 +209,12 @@ def scrape_pbp(game_id, date, roster, game_json, players, teams):
else:
if_json = False

# Scrape the html and see what it returns
html_df = html_pbp.scrape_game(game_id, players, teams, if_json)
if html_df is None: # If None we couldn't get the html pbp
if html_df is None:
return None

# Check if the json is missing the plays...if it scrape ESPN for the coordinates
# Check if the json is missing the plays...if it is scrape ESPN for the coordinates
if not if_json:
espn_df = espn_pbp.scrape_game(date, teams['Home'], teams['Away'])
game_df = combine_espn_html_pbp(html_df, espn_df, str(game_id), date, teams['Away'], teams['Home'])
Expand Down Expand Up @@ -248,13 +249,16 @@ def scrape_shifts(game_id, players, date):
shifts_df = None

# Control for fact that shift json is only available from 2010 onwards
# Note: Doesn't work for second half of 2009 season. My work around was returning None from json_shifts.scrape_game
# if the df was empty.
if int(date[:4]) >= 2010:
shifts_df = json_shifts.scrape_game(game_id)

if shifts_df is None:
shifts_df = html_shifts.scrape_game(game_id, players)

if shifts_df is None:
print("Unable scrape shifts for game", game_id)
broken_shifts_games.extend([[game_id, date]])
return None # Both failed so just return nothing

Expand Down
2 changes: 1 addition & 1 deletion hockey_scraper/json_shifts.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def scrape_game(game_id):
print('Error parsing Json shifts for game {}'.format(game_id), e)
return None

return game_df
return game_df if not game_df.empty else None



2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def read():

setup(
name='hockey_scraper',
version='1.2.1',
version='1.2.2',
description="""This package is designed to allow people to scrape Play by Play and Shift data off of the National
Hockey League (NHL) API and website for all preseason, regular season and playoff games since the
2007-2008 season""",
Expand Down

0 comments on commit 39181b6

Please sign in to comment.