In [1]:
import numpy as np
import pandas as pd
import json
import ujson
import os
import time
from tqdm.auto import tqdm

In [3]:
def convert_time(time_string):
    minute = int(time_string.split(":")[0])
    second = int(time_string.split(":")[1])
    
    return minute * 60 + second

In [4]:
def convert_points(point_string):
    visitor_point = int(point_string.split("-")[0].strip())
    home_point = int(point_string.split("-")[1].strip())
    
    return visitor_point, home_point

In [5]:
event_file_path = "C:\\WuYihong\\Data\\nba_movement_data\\data\\events"
movement_file_path = "C:\\WuYihong\\Data\\nba_movement_data\\data"

event_file_list = list(filter(lambda x: x.endswith(".csv"), os.listdir(event_file_path)))
movement_file_list = list(filter(lambda x: x.endswith(".json"), os.listdir(movement_file_path)))

player_info = {}
with open(".\\player_info\\player_id_to_info.json", "r") as f:
    player_info = ujson.load(f)

In [29]:
# test_dict = {}

for event in event_file_list:
    # event = "0021500439.csv"
    event_list = []
    event_file = os.path.join(event_file_path, event)
    game_id = event.split(".")[0]
    print("Processing {}...".format(game_id))

    # important info
    event_df = pd.read_csv(event_file)
    event_info = []
    visitor_score, home_score = 0, 0
    period, start_time, end_time = 1, 720, -1
    cur_scores = [0, 0]

    for event_id in range(len(event_df)):
        event_item = event_df.loc[event_id]
        period = int(event_item.PERIOD)
        end_time = convert_time(event_item.PCTIMESTRING)
        next_time = convert_time(event_df.loc[min(event_id+1, len(event_df)-1)].PCTIMESTRING)

        # record start scores
        if len(event_info) < 1:
            visitor_score, home_score = cur_scores

        # Update scores
        if not isinstance(event_item.SCORE, float):
            cur_scores[0] = convert_points(event_item.SCORE)[0]
            cur_scores[1] = convert_points(event_item.SCORE)[1]
        
        # record info
        home_desc = event_item.HOMEDESCRIPTION
        visitor_desc = event_item.VISITORDESCRIPTION
        if not isinstance(home_desc, float):
            home_desc = home_desc.lower()  
        else:
            home_desc = ""
        if not isinstance(visitor_desc, float):
            visitor_desc = visitor_desc.lower()  
        else:
            visitor_desc = ""

        event_info.append((home_desc, visitor_desc, str(event_item.PLAYER1_ID), str(event_item.PLAYER2_ID)))
                           
        if next_time != end_time:
            # process info
            offensive_team, terminal_player, result = process_descriptions(event_info)
            if 30 >= abs(start_time - end_time) >= 3 and offensive_team != "" and terminal_player != "" \
               and result != "":
                index = len(event_list)
                event_list.append((period, start_time, end_time, visitor_score, home_score, offensive_team, terminal_player, result, index))
                # test_dict["{}_{}_{}_{}_{}".format(period, start_time, end_time, visitor_score, home_score)] = event_info
            event_info = []
            start_time = end_time

    offensive_team, terminal_player, result = process_descriptions(event_info)
    if 30 >= abs(start_time - end_time) >= 3 and offensive_team != "" and terminal_player != "" and result != "":
        index = len(event_list)
        event_list.append((period, start_time, end_time, visitor_score, home_score, offensive_team, terminal_player, result, index))
    
    with open("game_info\\game_splits\\{}.json".format(game_id), "w") as f:
        json.dump(event_list, f, indent=4)       
    # break

# with open("tmp.json", "w") as f:
#     json.dump(test_dict, f, indent=4)
# test_dict

Processing 0021500001...
Processing 0021500002...
Processing 0021500003...
Judge Team Wrong! home turnover | thompson foul turnover (p2.t7)
Judge Team Wrong! visitor shot | miss cunningham 57' 3pt jump shot
Processing 0021500004...
Processing 0021500005...
Processing 0021500007...
Judge Player Wrong! home s.foul | tolliver s.foul (p1.pn) (s.wall)
Processing 0021500009...
Processing 0021500010...
Processing 0021500011...
Processing 0021500012...
Processing 0021500013...
Processing 0021500015...
Judge Player Wrong! visitor s.foul | smith s.foul (p1.pn) (s.anderson)
Processing 0021500016...
Processing 0021500017...
Judge Player Wrong! visitor s.foul | garnett s.foul (p1.t4) (g.zielinski)
Processing 0021500018...
Processing 0021500019...
Processing 0021500020...
Processing 0021500021...
Judge Team Wrong! visitor turnover | wade step out of bounds turnover (p1.t1)
Processing 0021500022...
Processing 0021500023...
Processing 0021500024...
Processing 0021500025...
Judge Player Wrong! home s.f

In [28]:
def process_descriptions(event_info):
    offensive_team, terminal_player, result = "", "", ""
    free_throw_times, made_free_throw_times = 0, 0
    has_free_throw = False
    # has_shot = False

    for event_item in event_info:
        home_desc, visitor_desc, player_1_id, player_2_id = event_item

        if "shot" in home_desc or "layup" in home_desc \
        or "fadeaway" in home_desc or "dunk" in home_desc:
            if "clock" in home_desc:
                continue
            
            if offensive_team == "visitor":
                print("Judge Team Wrong! home shot |", home_desc)
            offensive_team = "home"

            if terminal_player != player_1_id and terminal_player != "" and offensive_team != "home":
                print("Judge Player Wrong! home shot |", home_desc)
            elif terminal_player != player_1_id and terminal_player != "" and offensive_team == "home":
                continue
            terminal_player = player_1_id

            if "3pt" in home_desc:
                result += "3pt shot&"
            else:
                result += "2pt shot&"
            if "miss" in home_desc:
                result += "miss&"
            else:
                result += "made&"

        if "shot" in visitor_desc or "layup" in visitor_desc \
        or "fadeaway" in visitor_desc or "dunk" in visitor_desc:
            if "clock" in visitor_desc:
                continue
            
            if offensive_team == "home":
                print("Judge Team Wrong! visitor shot |", visitor_desc)
            offensive_team = "visitor"

            if terminal_player != player_1_id and terminal_player != "" and offensive_team != "visitor":
                print("Judge Player Wrong! visitor shot |", visitor_desc)
            elif terminal_player != player_1_id and terminal_player != "" and offensive_team == "visitor":
                continue
            terminal_player = player_1_id

            if "3pt" in visitor_desc:
                result += "3pt shot&"
            else:
                result += "2pt shot&"
            if "miss" in visitor_desc:
                result += "miss&"
            else:
                result += "made&"
        
        if "turnover" in home_desc and "turnover" not in result:
            if offensive_team == "visitor":
                print("Judge Team Wrong! home turnover |", home_desc)
                continue
            offensive_team = "home"
            terminal_player = player_1_id
            result += "turnover&"
            break

        if "turnover" in visitor_desc and "turnover" not in result:
            if offensive_team == "home":
                print("Judge Team Wrong! visitor turnover |", visitor_desc)
                continue
            offensive_team = "visitor"
            terminal_player = player_1_id
            result += "turnover&"
            break

        if "s.foul" in home_desc:
            if offensive_team == "home":
                print("Judge Team Wrong! home s.foul |", home_desc)

            if terminal_player == "":
                terminal_player = player_2_id
            elif terminal_player != player_2_id:
                print("Judge Player Wrong! home s.foul |", home_desc)
                continue
                
            offensive_team = "visitor"
            result += "s.foul&"

        if "s.foul" in visitor_desc:
            if offensive_team == "visitor":
                print("Judge Team Wrong! visitor s.foul |", visitor_desc)

            if terminal_player == "":
                terminal_player = player_2_id
            elif terminal_player != player_2_id:
                print("Judge Player Wrong! visitor s.foul |", visitor_desc)
                continue

            offensive_team = "home"
            result += "s.foul&"

        if "free throw" in home_desc and 'technical' not in home_desc:
            if offensive_team != "" and offensive_team != "home":
                continue
            
            if offensive_team == "":
                offensive_team = "home"
                terminal_player = player_1_id

            has_free_throw = True
            free_throw_times += 1
            if "miss" not in home_desc:
                made_free_throw_times += 1
        
        if "free throw" in visitor_desc and 'technical' not in visitor_desc:
            if offensive_team != "" and offensive_team != "visitor":
                continue
            
            if offensive_team == "":
                offensive_team = "visitor"
                terminal_player = player_1_id
                
            has_free_throw = True
            free_throw_times += 1
            if "miss" not in visitor_desc:
                made_free_throw_times += 1
        
    if has_free_throw:
        result += "free throw {} of {}&".format(made_free_throw_times, free_throw_times)
    
    return offensive_team, terminal_player, result