Skip to content

Commit

Permalink
moved player names fixes and team tri codes dictionaries to their own…
Browse files Browse the repository at this point in the history
… json files. Also added a few names to the player names fixes json for the early 2020 season
  • Loading branch information
HarryShomer committed Jan 20, 2021
1 parent a18cc8f commit eff06f3
Show file tree
Hide file tree
Showing 16 changed files with 362 additions and 129 deletions.
Binary file removed .DS_Store
Binary file not shown.
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Hockey-Scraper
.. inclusion-marker-for-sphinx
**Notes:**
**Note:**
* Coordinates are only scraped from ESPN for versions 1.33+
* NWHL usage has been deprecated due to the removal of the pbp information for each game.

Expand Down
Binary file modified docs/build/doctrees/environment.pickle
Binary file not shown.
Binary file modified docs/build/doctrees/index.doctree
Binary file not shown.
Binary file modified docs/build/doctrees/nhl_scrape_functions.doctree
Binary file not shown.
2 changes: 1 addition & 1 deletion docs/build/html/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ <h2>Contents<a class="headerlink" href="#contents" title="Permalink to this head
</ul>
</div>
<dl class="simple">
<dt><strong>Notes:</strong></dt><dd><ul class="simple">
<dt><strong>Note:</strong></dt><dd><ul class="simple">
<li><p>Coordinates are only scraped from ESPN for versions 1.33+</p></li>
<li><p>NWHL usage has been deprecated due to the removal of the pbp information for each game.</p></li>
</ul>
Expand Down
8 changes: 7 additions & 1 deletion docs/build/html/nhl_scrape_functions.html
Original file line number Diff line number Diff line change
Expand Up @@ -1938,7 +1938,13 @@ <h2>Scraping<a class="headerlink" href="#scraping" title="Permalink to this head
<dt id="hockey_scraper.utils.shared.season_start_bound">
<code class="sig-prename descclassname">hockey_scraper.utils.shared.</code><code class="sig-name descname">season_start_bound</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">year</span></em><span class="sig-paren">)</span><a class="headerlink" href="#hockey_scraper.utils.shared.season_start_bound" title="Permalink to this definition"></a></dt>
<dd><p>Get start bound for a season.</p>
<p>There is a bug in the schedule API for 2016 that causes the pushback to 09-30</p>
<dl class="simple">
<dt>Notes:</dt><dd><ul class="simple">
<li><p>There is a bug in the schedule API for 2016 that causes the pushback to 09-30</p></li>
<li><p>Pandemic season started in January</p></li>
</ul>
</dd>
</dl>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>year</strong> – str of year for given date</p>
Expand Down
2 changes: 1 addition & 1 deletion docs/build/html/searchindex.js

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions hockey_scraper/nhl/game_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,10 @@ def get_players_json(game_json):

for venue in players:
team_players = game_json['liveData']['boxscore']['teams'][venue]['players']
team_name = shared.get_team(game_json['liveData']['boxscore']['teams'][venue]['team']['name'].upper())
team_name = shared.get_team(game_json['liveData']['boxscore']['teams'][venue]['team']['name'])

for id_key in team_players:
player_name = shared.fix_name(team_players[id_key]['person']['fullName'].upper())
player_name = shared.fix_name(team_players[id_key]['person']['fullName'])

players[venue][player_name] = {
"id": team_players[id_key]['person']['id'],
Expand Down
6 changes: 3 additions & 3 deletions hockey_scraper/nhl/json_schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,12 @@ def scrape_schedule(date_from, date_to, preseason=False, not_over=False):

if (game_id >= 20000 or preseason) and game_id < 40000:
schedule.append({
"game_id": game['gamePk'],
"game_id": game['gamePk'],
"date": day['date'],
"start_time": datetime.strptime(game['gameDate'][:-1], "%Y-%m-%dT%H:%M:%S"),
"venue": game['venue'].get('name'),
"home_team": shared.get_team(game['teams']['home']['team']['name'].upper()),
"away_team": shared.get_team(game['teams']['away']['team']['name'].upper()),
"home_team": shared.get_team(game['teams']['home']['team']['name']),
"away_team": shared.get_team(game['teams']['away']['team']['name']),
"home_score": game['teams']['home'].get("score"),
"away_score": game['teams']['away'].get("score"),
"status": game["status"]["abstractGameState"]
Expand Down
2 changes: 1 addition & 1 deletion hockey_scraper/nhl/pbp/html_pbp.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ def populate_players(event_dict, players, away_players, home_players):
# Deal with the Home & Away Player Fields
try:
ven_player = home_players[j] if venue == "Home" else away_players[j]
name = shared.fix_name(ven_player[0].upper())
name = shared.fix_name(ven_player[0])
event_dict['{}Player{}'.format(venue.lower(), j + 1)] = name
event_dict['{}Player{}_id'.format(venue.lower(), j + 1)] = players[venue][name]['id']
except KeyError:
Expand Down
6 changes: 3 additions & 3 deletions hockey_scraper/nhl/pbp/json_pbp.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ def get_teams(pbp_json):
:return: dict with home and away
"""
return {
'Home': shared.get_team(pbp_json['gameData']['teams']['home']['name'].upper()),
'Away': shared.get_team(pbp_json['gameData']['teams']['away']['name'].upper())
'Home': shared.get_team(pbp_json['gameData']['teams']['home']['name']),
'Away': shared.get_team(pbp_json['gameData']['teams']['away']['name'])
}


Expand Down Expand Up @@ -100,7 +100,7 @@ def parse_event(event):

for i in range(len(event['players'])):
if event['players'][i]['playerType'] != 'Goalie':
play['p{}_name'.format(i + 1)] = shared.fix_name(event['players'][i]['player']['fullName'].upper())
play['p{}_name'.format(i + 1)] = shared.fix_name(event['players'][i]['player']['fullName'])
play['p{}_ID'.format(i + 1)] = event['players'][i]['player']['id']

play['xC'] = event['coordinates'].get('x')
Expand Down
2 changes: 1 addition & 1 deletion hockey_scraper/nhl/shifts/json_shifts.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def parse_shift(shift):
if shift['eventDescription'] is not None:
return {}

name = shared.fix_name(' '.join([shift['firstName'].strip(' ').upper(), shift['lastName'].strip(' ').upper()]))
name = shared.fix_name(' '.join([shift['firstName'].strip(' '), shift['lastName'].strip(' ')]))

shift_dict['Player'] = name
shift_dict['Player_Id'] = shift['playerId']
Expand Down
279 changes: 279 additions & 0 deletions hockey_scraper/utils/player_name_fixes.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
{
"_description": "Fixes some of the mistakes made with player names (converts to 'correct' name)",
"_comment": "A majority of this is courtesy of Muneeb Alam (https://github.com/muneebalam/Hockey/blob/master/NHL/Core/GetPbP.py)",

"fixes": {
"n/a": "n/a",
"ALEXANDER OVECHKIN": "Alex Ovechkin",
"TOBY ENSTROM": "Tobias Enstrom",
"JAMIE MCGINN": "Jamie McGinn",
"CODY MCLEOD": "Cody McLeod",
"MARC-EDOUARD VLASIC": "Marc-Edouard Vlasic",
"RYAN MCDONAGH": "Ryan McDonagh",
"CHRIS TANEV": "Christopher Tanev",
"JARED MCCANN": "Jared McCann",
"P.K. SUBBAN": "PK Subban",
"DEVANTE SMITH-PELLY": "Devante Smith-Pelly",
"MIKE MCKENNA": "Mike McKenna",
"MICHAEL MCCARRON": "Michael McCarron",
"T.J. BRENNAN": "TJ Brennan",
"BRAYDEN MCNABB": "Brayden McNabb",
"PIERRE-ALEXANDRE PARENTEAU": "PA Parenteau",
"JAMES VAN RIEMSDYK": "James van Riemsdyk",
"OLIVER EKMAN-LARSSON": "Oliver Ekman-Larsson",
"TJ OSHIE": "TJ Oshie",
"J P DUMONT": "JP Dumont",
"J.T. MILLER": "JT Miller",
"R.J UMBERGER": "RJ Umberger",
"PA PARENTEAU": "PA Parenteau",
"PER-JOHAN AXELSSON": "P.J. Axelsson",
"MAXIME TALBOT": "Max Talbot",
"JOHN-MICHAEL LILES": "John-Michael Liles",
"DANIEL GIRARDI": "Dan Girardi",
"DANIEL CLEARY": "Dan Cleary",
"NIKLAS KRONVALL": "Niklas Kronwall",
"SIARHEI KASTSITSYN": "Sergei Kostitsyn",
"ANDREI KASTSITSYN": "Andrei Kostitsyn",
"ALEXEI KOVALEV": "Alex Kovalev",
"DAVID JOHNNY ODUYA": "Johnny Oduya",
"EDWARD PURCELL": "Teddy Purcell",
"NICKLAS GROSSMAN": "Nicklas Grossmann",
"PERNELL KARL SUBBAN": "PK Subban",
"VOJTEK VOLSKI": "Wojtek Wolski",
"VYACHESLAV VOYNOV": "Slava Voynov",
"FREDDY MODIN": "Fredrik Modin",
"VACLAV PROSPAL": "Vinny Prospal",
"KRISTOPHER LETANG": "Kris Letang",
"PIERRE ALEXANDRE PARENTEAU": "PA Parenteau",
"T.J. OSHIE": "TJ Oshie",
"JOHN HILLEN III": "Jack Hillen",
"BRANDON CROMBEEN": "B.J. Crombeen",
"JEAN-PIERRE DUMONT": "JP Dumont",
"RYAN NUGENT-HOPKINS": "Ryan Nugent-Hopkins",
"CONNOR MCDAVID": "Connor McDavid",
"TREVOR VAN RIEMSDYK": "Trevor van Riemsdyk",
"CALVIN DE HAAN": "Calvin de Haan",
"GREG MCKEGG": "Greg McKegg",
"NATHAN MACKINNON": "Nathan MacKinnon",
"KYLE MCLAREN": "Kyle McLaren",
"ADAM MCQUAID": "Adam McQuaid",
"DYLAN MCILRATH": "Dylan McIlrath",
"DANNY DEKEYSER": "Danny DeKeyser",
"JAKE MCCABE": "Jake McCabe",
"JAMIE MCBAIN": "Jamie McBain",
"PIERRE-MARC BOUCHARD": "Pierre-Marc Bouchard",
"JEAN-FRANCOIS JACQUES": "JF Jacques",
"OLE-KRISTIAN TOLLEFSEN": "Ole-Kristian Tollefsen",
"MARC-ANDRE BERGERON": "Marc-Andre Bergeron",
"MARC-ANTOINE POULIOT": "Marc-Antoine Pouliot",
"MARC-ANDRE GRAGNANI": "Marc-Andre Gragnani",
"JORDAN LAVALLEE-SMOTHERMAN": "Jordan Lavallee-Smotherman",
"PIERRE-LUC LETOURNEAU-LEBLOND": "Pierre Leblond",
"J-F JACQUES": "JF Jacques",
"JP DUMONT": "JP Dumont",
"MARC-ANDRE CLICHE": "Marc-Andre Cliche",
"J-P DUMONT": "JP Dumont",
"JOSHUA BAILEY": "Josh Bailey",
"OLIVIER MAGNAN-GRENIER": "Olivier Magnan-Grenier",
"FRÉDÉRIC ST-DENIS": "Frederic St-Denis",
"MARC-ANDRE BOURDON": "Marc-Andre Bourdon",
"PIERRE-CEDRIC LABRIE": "Pierre-Cedric Labrie",
"JONATHAN AUDY-MARCHESSAULT": "Jonathan Marchessault",
"JEAN-GABRIEL PAGEAU": "Jean-Gabriel Pageau",
"JEAN-PHILIPPE COTE": "Jean-Philippe Cote",
"PIERRE-EDOUARD BELLEMARE": "Pierre-Edouard Bellemare",
"COLIN (JOHN) WHITE": "Colin White",
"BATES (JON) BATTAGLIA": "Bates Battaglia",
"MATHEW DUBMA": "Matt Dumba",
"NIKOLAI ANTROPOV": "Nik Antropov",
"KRYS BARCH": "Krystofer Barch",
"CAMERON BARKER": "Cam Barker",
"NICKLAS BERGFORS": "Niclas Bergfors",
"ROBERT BLAKE": "Rob Blake",
"MICHAEL BLUNDEN": "Mike Blunden",
"CHRISTOPHER BOURQUE": "Chris Bourque",
"MICHëL BOURNIVAL": "Michael Bournival",
"NICHOLAS BOYNTON": "Nick Boynton",
"TJ BRENNAN": "TJ Brennan",
"DANIEL BRIERE": "Danny Briere",
"TJ BRODIE": "TJ Brodie",
"J.T. BROWN": "JT Brown",
"ALEXANDRE BURROWS": "Alex Burrows",
"MICHAEL CAMMALLERI": "Mike Cammalleri",
"DANIEL CARCILLO": "Dan Carcillo",
"MATTHEW CARLE": "Matt Carle",
"DANNY CLEARY": "Dan Cleary",
"JOSEPH CORVO": "Joe Corvo",
"JOSEPH CRABB": "Joey Crabb",
"BJ CROMBEEN": "B.J. Crombeen",
"EVGENII DADONOV": "Evgeny Dadonov",
"CHRIS VANDE VELDE": "Chris VandeVelde",
"JACOB DE LA ROSE": "Jacob de la Rose",
"JOE DIPENTA": "Joe DiPenta",
"JON DISALVATORE": "Jon DiSalvatore",
"JACOB DOWELL": "Jake Dowell",
"NICHOLAS DRAZENOVIC": "Nick Drazenovic",
"ROBERT EARL": "Robbie Earl",
"ALEXANDER FROLOV": "Alex Frolov",
"T.J. GALIARDI": "TJ Galiardi",
"TJ GALIARDI": "TJ Galiardi",
"ANDREW GREENE": "Andy Greene",
"MICHAEL GRIER": "Mike Grier",
"NATHAN GUENIN": "Nate Guenin",
"MARTY HAVLAT": "Martin Havlat",
"JOSHUA HENNESSY": "Josh Hennessy",
"T.J. HENSICK": "TJ Hensick",
"TJ Hensick": "TJ Hensick",
"CHRISTOPHER HIGGINS": "Chris Higgins",
"ROBERT HOLIK": "Bobby Holik",
"MATTHEW IRWIN": "Matt Irwin",
"P. J. AXELSSON": "P.J. Axelsson",
"PER JOHAN AXELSSON": "P.J. Axelsson",
"JONATHON KALINSKI": "Jon Kalinski",
"ALEXANDER KHOKHLACHEV": "Alex Khokhlachev",
"DJ KING": "DJ King",
"DWAYNE KING": "DJ King",
"MICHAEL KNUBLE": "Mike Knuble",
"KRYSTOFER KOLANOS": "Krys Kolanos",
"MICHAEL KOMISAREK": "Mike Komisarek",
"STAFFAN KRONVALL": "Staffan Kronwall",
"NIKOLAY KULEMIN": "Nikolai Kulemin",
"CLARKE MACARTHUR": "Clarke MacArthur",
"LANE MACDERMID": "Lane MacDermid",
"ANDREW MACDONALD": "Andrew MacDonald",
"RAYMOND MACIAS": "Ray Macias",
"CRAIG MACDONALD": "Craig MacDonald",
"STEVE MACINTYRE": "Steve MacIntyre",
"MAKSIM MAYOROV": "Maxim Mayorov",
"AARON MACKENZIE": "Aaron MacKenzie",
"DEREK MACKENZIE": "Derek MacKenzie",
"RODNEY PELLEY": "Rod Pelley",
"BRETT MACLEAN": "Brett MacLean",
"ANDREW MACWILLIAM": "Andrew MacWilliam",
"BRYAN MCCABE": "Bryan McCabe",
"OLIVIER MAGNAN": "Olivier Magnan-Grenier",
"DEAN MCAMMOND": "Dean McAmmond",
"KENNDAL MCARDLE": "Kenndal McArdle",
"ANDY MCDONALD": "Andy McDonald",
"COLIN MCDONALD": "Colin McDonald",
"JOHN MCCARTHY": "John McCarthy",
"STEVE MCCARTHY": "Steve McCarthy",
"DARREN MCCARTY": "Darren McCarty",
"JAY MCCLEMENT": "Jay McClement",
"CODY MCCORMICK": "Cody McCormick",
"MAX MCCORMICK": "Max McCormick",
"BROCK MCGINN": "Brock McGinn",
"TYE MCGINN": "Tye McGinn",
"BRIAN MCGRATTAN": "Brian McGrattan",
"DAVID MCINTYRE": "David McIntyre",
"NATHAN MCIVER": "Nathan McIver",
"JAY MCKEE": "Jay McKee",
"CURTIS MCKENZIE": "Curtis McKenzie",
"FRAZER MCLAREN": "Frazer McLaren",
"BRETT MCLEAN": "Brett McLean",
"BRANDON MCMILLAN": "Brandon McMillan",
"CARSON MCMILLAN": "Carson McMillan",
"PHILIP MCRAE": "Philip McRae",
"FREDERICK MEYER IV": "Freddy Meyer",
"MICHAEL MODANO": "Mike Modano",
"CHRISTOPHER NEIL": "Chris Neil",
"MATTHEW NIETO": "Matt Nieto",
"JOHN ODUYA": "Johnny Oduya",
"PIERRE PARENTEAU": "PA Parenteau",
"MARC POULIOT": "Marc-Antoine Pouliot",
"MAXWELL REINHART": "Max Reinhart",
"MICHAEL RUPP": "Mike Rupp",
"ROBERT SCUDERI": "Rob Scuderi",
"TOMMY SESTITO": "Tom Sestito",
"MICHAEL SILLINGER": "Mike Sillinger",
"JONATHAN SIM": "Jon Sim",
"MARTIN ST LOUIS": "Martin St. Louis",
"MATTHEW STAJAN": "Matt Stajan",
"ZACHERY STORTINI": "Zack Stortini",
"PK SUBBAN": "PK Subban",
"WILLIAM THOMAS": "Bill Thomas",
"R.J. UMBERGER": "RJ Umberger",
"RJ UMBERGER": "RJ Umberger",
"MARK VAN GUILDER": "Mark van Guilder",
"BRYCE VAN BRABANT": "Bryce van Brabant",
"DAVID VAN DER GULIK": "David van der Gulik",
"MIKE VAN RYN": "Mike van Ryn",
"ANDREW WOZNIEWSKI": "Andy Wozniewski",
"JAMES WYMAN": "JT Wyman",
"JT WYMAN": "JT Wyman",
"NIKOLAY ZHERDEV": "Nikolai Zherdev",
"HARRISON ZOLNIERCZYK": "Harry Zolnierczyk",
"MARTIN ST PIERRE": "Martin St. Pierre",
"B.J CROMBEEN": "B.J. Crombeen",
"DENIS GAUTHIER JR.": "DENIS GAUTHIER",
"DENIS JR. GAUTHIER": "DENIS GAUTHIER",
"MARC-ANDRE FLEURY": "Marc-Andre Fleury",
"DAN LACOUTURE": "Dan LaCouture",
"RICK DIPIETRO": "Rick DiPietro",
"JOEY MACDONALD": "Joey MacDonald",
"TIMOTHY JR. THOMAS": "Tim Thomas",
"ILJA BRYZGALOV": "Ilya Bryzgalov",
"MATHEW DUMBA": "Matt Dumba",
"MICHAËL BOURNIVAL": "Michael Bournival",
"MATTHEW BENNING": "Matt Benning",
"ZACHARY SANFORD": "Zach Sanford",
"AJ GREER": "A.J. Greer",
"JT COMPHER": "J.T. Compher",
"NICOLAS PETAN": "Nic Petan",
"VINCENT HINOSTROZA": "Vinnie Hinostroza",
"PHILIP VARONE": "Phil Varone",
"JOSHUA MORRISSEY": "Josh Morrissey",
"Mathew Bodie": "Mat Bodie",
"MICHAEL FERLAND": "Micheal Ferland",
"MICHAEL SANTORELLI": "Mike Santorelli",
"CHRISTOPHER BREEN": "Chris Breen",
"BRYCE VAN BRABRANT": "Bryce Van Brabant",
"ALEXANDER KILLORN": "Alex Killorn",
"JOSEPH MORROW": "Joe Morrow",
"ALEX STEEN": "Alexander Steen",
"BRADLEY MILLS": "Brad Mills",
"MICHAEL SISLO": "Mike Sislo",
"MICHAEL VERNACE": "Mike Vernace",
"STEVEN REINPRECHT": "Steve Reinprecht",
"MATTHEW MURRAY": "Matt Murray",
"THOMAS MCCOLLUM": "TOM MCCOLLUM",
"MICHAEL MATHESON": "MIKE MATHESON",
"BOO NIEVES": "CRISTOVAL NIEVES",
"J.F. BERUBE": "JEAN-FRANCOIS BERUBE",
"TONY DEANGELO": "ANTHONY DEANGELO",
"JEFFREY HAMILTON": "JEFF HAMILTON",
"JAMES VANDERMEER": "JIM VANDERMEER",
"MICHAEL YORK": "MIKE YORK",
"EMMANUEL LEGACE": "MANNY LEGACE",
"JAMES DOWD": "JIM DOWD",
"ANDREW MILLER": "DREW MILLER",
"JOHN PEVERLEY": "RICH PEVERLEY",
"ILJA ZUBOV": "ILYA ZUBOV",
"CHRISTOPHER MINARD": "CHRIS MINARD",
"BENJAMIN ONDRUS": "BEN ONDRUS",
"ZACH FITZGERALD": "ZACK FITZGERALD",
"STEPHEN VALIQUETTE": "STEVE VALIQUETTE",
"OLAF KOLZIG": "OLIE KOLZIG",
"J-SEBASTIEN AUBIN": "JEAN-SEBASTIEN AUBIN",
"ALEXANDER AULD": "ALEX AULD",
"JAMES HOWARD": "JIMMY HOWARD",
"JEFF DROUIN-DESLAURIERS": "JEFF DESLAURIERS",
"SIMEON VARLAMOV": "SEMYON VARLAMOV",
"ALEXANDER PECHURSKI": "Alexander Pechurskiy",
"JEFFREY PENNER": "JEFF PENNER",
"EMMANUEL FERNANDEZ": "Manny FERNANDEZ",
"ALEXANDER PETROVIC": "ALEX PETROVIC",
"ZACHARY ASTON-REESE": "ZACH ASTON-REESE",
"J-F BERUBE": "JEAN-FRANCOIS BERUBE",
"DANNY O'REGAN": "DANIEL O'REGAN",
"PATRICK MAROON": "PAT MAROON",
"LEE STEMPNIAK": "LEE STEMPNIAK",
"JAMES REIMER ,": "JAMES REIMER",
"CALVIN PETERSEN ,": "CALVIN PETERSEN",
"CAL PETERSEN": "CALVIN PETERSEN",
"ALEXANDER NYLANDER": "ALEX NYLANDER",
"CHRISTOPHER WAGNER": "CHRIS WAGNER",
"EGOR SHARANGOVICH": "Yegor Sharangovich",
"ALEXIS LAFRENI?RE": "Alexis Lafrenière"
}
}

0 comments on commit eff06f3

Please sign in to comment.