In [2]:
import os
import json
import numpy as np
import pandas as pd
import pprint
from tqdm import tqdm

In [25]:
data_path = "F:\\WuYihong\\Data\\nba_movement_data\\data"
file_list = os.listdir(data_path)
file_list = list(filter(lambda x: True if ".json" in x else False, file_list))

In [30]:
# generate team info
team_to_id = {}
id_to_team = {}
for filename in tqdm(file_list):
    with open(os.path.join(data_path, filename), "r") as f:
        data = json.load(f)
    
    # pprint.pprint(data['events'][0]['home'])
    abbreviation = data['events'][0]['home']['abbreviation']
    team_name = data['events'][0]['home']['name']
    team_id = data['events'][0]['home']['teamid']

    abbreviation = data['events'][0]['visitor']['abbreviation']
    team_name = data['events'][0]['visitor']['name']
    team_id = data['events'][0]['visitor']['teamid']

    team_to_id[team_name] = team_id
    team_to_id[abbreviation] = team_id
    id_to_team[team_id] = (team_name, abbreviation)

    if len(team_to_id) == 60 and len(id_to_team) == 30:
        break

team_to_id = dict(sorted(team_to_id.items(), key=lambda x: x[0]))
id_to_team = dict(sorted(id_to_team.items(), key=lambda x: x[0]))

print("Team length:", len(team_to_id))
print("ID length:", len(id_to_team))

with open(".\\team_info\\team_to_id.json", "w") as f:
    json.dump(team_to_id, f)
    
with open(".\\team_info\\id_to_team.json", "w") as f:
    json.dump(id_to_team, f)

  9%|▊         | 55/632 [05:29<57:36,  5.99s/it]Team length: 60
ID length: 30



In [10]:
# generate player info
meta_data_path = "E:\\WuYihong\\2021VIS\\data"
meta_file_list = os.listdir(meta_data_path)

player_id_to_info = {}
player_name_to_id = {}

for meta_file in tqdm(meta_file_list):
    cur_game_path = os.path.join(meta_data_path, meta_file)
    random_event = np.random.choice(os.listdir(cur_game_path))
    cur_event_path = os.path.join(cur_game_path, random_event)
    meta_data_file = list(filter(lambda x: True if "metadata" in x else False, os.listdir(cur_event_path)))[0]
    
    with open(os.path.join(cur_event_path, meta_data_file), "r") as f:
        data = json.load(f)
    
    home_players = data['home']['players']
    visitor_players = data['visitor']['players']
    
    for player in home_players:
        player_id = player['playerid']
        first_name = player['firstname']
        last_name = player['lastname']
        jersey = player['jersey']
        team = data['home']['abbreviation']
        team_id = data['home']['teamid']

        player_name = " ".join([first_name, last_name])
        if player_name not in player_name_to_id.keys():
            player_name_to_id[player_name] = player_id
        else:
            if player_id != player_name_to_id[player_name]:
                print("Wrong Player info", player_name, player_id, player_name_to_id[player_name])

        if player_id not in player_id_to_info.keys():
            player_id_to_info[player_id] = {'first_name': first_name,
                                            'last_name': last_name,
                                            'jersey': [jersey],
                                            'team': [team],
                                            'team_id': [team_id]
                                            }
        else:
            if team in player_id_to_info[player_id]['team']:
                continue
            else:
                print("Team Wrong", first_name, last_name, player_id_to_info[player_id]['team'], \
                      "to", team)
                player_id_to_info[player_id]['jersey'].append(jersey)
                player_id_to_info[player_id]['team'].append(team)
                player_id_to_info[player_id]['team_id'].append(team_id)
    

    for player in visitor_players:
        player_id = player['playerid']
        first_name = player['firstname']
        last_name = player['lastname']
        jersey = player['jersey']
        team = data['visitor']['abbreviation']
        team_id = data['visitor']['teamid']

        player_name = " ".join([first_name, last_name])
        if player_name not in player_name_to_id.keys():
            player_name_to_id[player_name] = player_id
        else:
            if player_id != player_name_to_id[player_name]:
                print("Wrong Player info", player_name, player_id, player_name_to_id[player_name])

        if player_id not in player_id_to_info.keys():
            player_id_to_info[player_id] = {'first_name': first_name,
                                            'last_name': last_name,
                                            'jersey': [jersey],
                                            'team': [team],
                                            'team_id': [team_id]
                                            }
        else:
            if team in player_id_to_info[player_id]['team']:
                continue
            else:
                print("Team Wrong", first_name, last_name, player_id_to_info[player_id]['team'], \
                      "to", team)
                player_id_to_info[player_id]['jersey'].append(jersey)
                player_id_to_info[player_id]['team'].append(team)
                player_id_to_info[player_id]['team_id'].append(team_id)

# print(player_id_to_info)
# print(player_name_to_id)
player_id_to_info = dict(sorted(player_id_to_info.items(), key=lambda x: x[1]['team'][0]))
player_name_to_id = dict(sorted(player_name_to_id.items(), key=lambda x: x[0].upper()))

with open(".\\player_info\\player_id_to_info.json", "w") as f:
    json.dump(player_id_to_info, f, indent=4)
    
with open(".\\player_info\\player_name_to_id.json", "w") as f:
    json.dump(player_name_to_id, f, indent=4)

 20%|█▉        | 125/632 [00:01<00:06, 73.84it/s]Team Wrong Jarnell Stokes ['MEM'] to MIA
Team Wrong Beno Udrih ['MEM'] to MIA
Team Wrong Mario Chalmers ['MIA'] to MEM
Team Wrong James Ennis ['MIA'] to MEM
 70%|███████   | 444/632 [00:22<00:14, 13.07it/s]Team Wrong Ish Smith ['NOP'] to PHI
 73%|███████▎  | 464/632 [00:24<00:11, 14.51it/s]Team Wrong Ryan Hollins ['WAS'] to MEM
 98%|█████████▊| 622/632 [00:34<00:00, 17.48it/s]Team Wrong Josh Smith ['LAC'] to HOU
100%|██████████| 632/632 [00:35<00:00, 18.00it/s]


In [12]:
import time

start_time = time.time()
with open(".\\player_info\\player_id_to_info.json", "r") as f:
    data = json.load(f)
    print("player_id_to_info", len(data))
    for player_id, player in data.items():
        if player['team'][0] == "LAL":
            print(player['first_name'], player['last_name'], player['jersey'][0])
print("Time Used:", time.time() - start_time)


with open(".\\player_info\\player_name_to_id.json", "r") as f:
    data = json.load(f)
    print("player_name_to_id", len(data))

player_id_to_info 450
Nick Young 0
D'Angelo Russell 1
Brandon Bass 2
Anthony Brown 3
Ryan Kelly 4
Jordan Clarkson 6
Marcelo Huertas 9
Roy Hibbert 17
Lou Williams 23
Kobe Bryant 24
Tarik Black 28
Julius Randle 30
Robert Sacre 50
Metta World Peace 37
Larry Nance Jr. 7
Time Used: 0.003003358840942383
player_name_to_id 450
