In [42]:
from nba_api.stats.endpoints import playbyplayv3

# Example game ID
game_id = '0021400890'  # Replace with the desired game ID
bnk_pbp = playbyplayv3.PlayByPlayV3(game_id=game_id).get_data_frames()[0]
bnk_pbp['ABS_TIME'] = bnk_pbp.apply(
    lambda row: (row['period'] - 1) * 12 * 60 + int(row['clock'][2:4]) * 60 + int(row['clock'][5:7]), axis=1
)



In [44]:
print(bnk_pbp.head())
print(bnk_pbp.columns)


       gameId  actionNumber        clock  period      teamId teamTricode  \
0  0021400890             0  PT12M00.00S       1           0               
1  0021400890             1  PT12M00.00S       1  1610612766         CHA   
2  0021400890             2  PT11M44.00S       1  1610612747         LAL   
3  0021400890             3  PT11M29.00S       1  1610612766         CHA   
4  0021400890             4  PT11M28.00S       1  1610612747         LAL   

   personId playerName   playerNameI  xLegacy  ...  scoreAway  pointsTotal  \
0         0                                 0  ...          0            0   
1    203469     Zeller     C. Zeller        0  ...                       0   
2    201961  Ellington  W. Ellington      234  ...          3            3   
3    201945  Henderson  G. Henderson      125  ...                       0   
4      2430     Boozer     C. Boozer        0  ...                       0   

  location                                        description   actionType

In [7]:
from nba_api.stats.endpoints import leaguegamefinder

# קבלת כל המשחקים בעונת 2014-15
game_finder = leaguegamefinder.LeagueGameFinder(season_nullable='2014-15')
games = game_finder.get_data_frames()[0]

# הדפסת Game IDs
print(games['GAME_ID'].unique())


['0041400406' '0041400405' '0041400404' ... '0011400003' '0011400002'
 '0011400001']


In [9]:
from nba_api.stats.endpoints import playbyplayv3
import pandas as pd

# פונקציה למעבר משעון זמן מוחלט (רבע ושעון) לזמן מוחלט בשניות
def clock_to_absolute_seconds(clock, period):
    if isinstance(clock, str) and clock.startswith('PT'):
        clock = clock.replace('PT', '').replace('S', '')
        minutes, seconds = map(float, clock.split('M'))
        period_offset = (period - 1) * 12 * 60  # Offset לכל רבע
        return int(period_offset + minutes * 60 + seconds)
    else:
        raise ValueError("Invalid clock format. Expected 'PTMMSS' format.")

# פונקציה לחישוב זמני משחק
def calculate_minutes_played(play_by_play_df, until_clock):
    play_by_play_df = play_by_play_df.sort_values(by=['period', 'ABS_TIME']).reset_index(drop=True)

    active_players = {}
    total_play_time = {}

    sub_events = play_by_play_df[
        (play_by_play_df.actionType == 'Substitution') & 
        (play_by_play_df.ABS_TIME <= until_clock)
    ]

    for _, row in sub_events.iterrows():
        current_time = row['ABS_TIME']
        description = row['description']

        if 'SUB:' in description and 'FOR' in description:
            try:
                sub_index = description.index('SUB:') + len('SUB:')
                for_index = description.index('FOR')
                sub_in = description[sub_index:for_index].strip()
                sub_out = description[for_index + len('FOR'):].strip()
                team = row['teamTricode']

                if team not in active_players:
                    active_players[team] = {}

                if sub_out in active_players[team]:
                    play_start_time = active_players[team].pop(sub_out)
                    if current_time >= play_start_time:
                        minutes_played = (current_time - play_start_time) / 60
                        total_play_time[sub_out] = total_play_time.get(sub_out, 0) + minutes_played

                active_players[team][sub_in] = current_time
            except ValueError:
                continue

    for team, players in active_players.items():
        for player, play_start_time in players.items():
            minutes_played = (until_clock - play_start_time) / 60
            total_play_time[player] = total_play_time.get(player, 0) + minutes_played

    result = pd.DataFrame(list(total_play_time.items()), columns=['Player', 'MinutesPlayed'])
    result = result.sort_values(by='MinutesPlayed', ascending=False).reset_index(drop=True)
    return result

# קבלת נתוני Play-by-Play ממשחק
example_game_id = '0021400890'  # מזהה משחק לדוגמה
bnk_pbp = playbyplayv3.PlayByPlayV3(game_id=example_game_id).get_data_frames()[0]

# הוספת עמודת ABS_TIME
bnk_pbp['ABS_TIME'] = bnk_pbp.apply(
    lambda row: clock_to_absolute_seconds(row['clock'], row['period']), axis=1
)

# זמן החיתוך לכל המשחק (48 דקות)
until_clock = 2880  # 48 דקות = 2880 שניות
# או הוספת הארכות
# overtime_periods = 1
# until_clock = 2880 + (overtime_periods * 300)

# קריאה לפונקציה
ttp = calculate_minutes_played(bnk_pbp, until_clock)
print(ttp)


            Player  MinutesPlayed
0             Hill      37.433333
1            Kelly      33.550000
2     Ma. Williams      32.816667
3          Roberts      31.616667
4              Lin      29.750000
5            Davis      24.733333
6           Zeller      22.700000
7        Jefferson      20.816667
8          Biyombo      17.133333
9          Johnson      11.200000
10  Kidd-Gilchrist      10.700000
11           Sacre       9.933333
12       Henderson       8.550000
13        Clarkson       8.116667
14          Boozer       6.333333
15          Taylor       6.000000
16    Mo. Williams       3.750000


In [11]:
from nba_api.stats.endpoints import leaguegamelog

# Fetch game logs for the 2014-15 season
game_logs = leaguegamelog.LeagueGameLog(season='2014-15').get_data_frames()[0]

# Display a few game IDs
print(game_logs[['GAME_ID', 'TEAM_ABBREVIATION', 'GAME_DATE']].head())


      GAME_ID TEAM_ABBREVIATION   GAME_DATE
0  0021400001               ORL  2014-10-28
1  0021400001               NOP  2014-10-28
2  0021400003               HOU  2014-10-28
3  0021400003               LAL  2014-10-28
4  0021400002               SAS  2014-10-28


In [13]:
import pandas as pd
from nba_api.stats.endpoints import playbyplayv2

# Load the first 100 and last 100 rows from nba_2015_shots.csv
shots_df = pd.read_csv('/Users/home/Shotpred/data/External/nba_2015_shots.csv')
subset_shots_df = pd.concat([shots_df.head(100), shots_df.tail(100)])

# Select only the required columns
filtered_shots_df = subset_shots_df[['PLAYER_ID', 'GAME_ID', 'LOC_X', 'LOC_Y', 'QUARTER', 'MINS_LEFT', 'SECS_LEFT']]

# Add new columns for TIME_ON_COURT and TIME_LEFT
filtered_shots_df['TIME_ON_COURT'] = None
filtered_shots_df['TIME_LEFT'] = None

# Cache for play-by-play data to avoid redundant API calls
pbp_cache = {}

# Function to fetch play-by-play data and cache it
def get_play_by_play(game_id):
    if game_id not in pbp_cache:
        try:
            pbp_cache[game_id] = playbyplayv2.PlayByPlayV2(game_id=game_id).get_data_frames()[0]
            print(f"Fetched play-by-play data for game_id {game_id}")
        except Exception as e:
            print(f"Error fetching data for game_id {game_id}: {e}")
            pbp_cache[game_id] = None
    return pbp_cache[game_id]

# Function to calculate TIME_ON_COURT for a single shot
def calculate_time_on_court(game_id, player_id, quarter, mins_left, secs_left, player_times):
    pbp = get_play_by_play(game_id)
    if pbp is None:
        return None

    # Reverse the play-by-play data to start from the beginning of the game
    pbp = pbp[::-1]

    # Convert game clock into seconds since the beginning of the game
    total_time_seconds = (quarter - 1) * 12 * 60 + (12 * 60 - (mins_left * 60 + secs_left))

    # Track when the player entered the game
    player_events = pbp[(pbp['PLAYER1_ID'] == player_id) | (pbp['PLAYER2_ID'] == player_id)]

    if player_id not in player_times:
        player_times[player_id] = 0  # Initialize player's time on court

    is_on_court = False
    for _, event in player_events.iterrows():
        try:
            event_time = (event['PERIOD'] - 1) * 12 * 60 + (
                12 * 60 - int(event['PCTIMESTRING'].split(':')[0]) * 60 - int(event['PCTIMESTRING'].split(':')[1]))
        except (ValueError, KeyError):
            continue

        if event['EVENTMSGTYPE'] == 8:  # Substitution event
            if event['PLAYER1_ID'] == player_id:  # Player substituted out
                is_on_court = False
            elif event['PLAYER2_ID'] == player_id:  # Player substituted in
                is_on_court = True

        if is_on_court and event_time <= total_time_seconds:
            player_times[player_id] = event_time

    return player_times[player_id]

# Function to calculate TIME_LEFT
def calculate_time_left(quarter, mins_left, secs_left):
    # Total game time in seconds
    total_game_time = 48 * 60  # 48 minutes in seconds

    # Time passed in seconds
    time_passed = (quarter - 1) * 12 * 60 + (12 * 60 - (mins_left * 60 + secs_left))

    # Time left in seconds
    time_left_seconds = total_game_time - time_passed

    # Convert to minutes and seconds
    minutes_left = time_left_seconds // 60
    seconds_left = time_left_seconds % 60
    return f"{int(minutes_left)}:{int(seconds_left):02}"

# Process the shots in the dataset
prev_game_id = None
player_times = {}

for idx, row in filtered_shots_df.iterrows():
    # Transform GAME_ID to match NBA API format
    game_id = f"00214{str(row['GAME_ID'])[-5:]}"
    player_id = row['PLAYER_ID']
    quarter = row['QUARTER']
    mins_left = row['MINS_LEFT']
    secs_left = row['SECS_LEFT']

    # Print debug output for each row
    print(f"Processing new game: {game_id}")

    # Reset player times at the start of a new game
    if game_id != prev_game_id:
        player_times = {}
        prev_game_id = game_id
        pbp = get_play_by_play(game_id)
        if pbp is None:
            continue

    # Calculate TIME_ON_COURT
    time_on_court = calculate_time_on_court(
        game_id, player_id, quarter, mins_left, secs_left, player_times
    )
    if time_on_court is not None:
        time_on_court_minutes = time_on_court / 60
        filtered_shots_df.loc[idx, 'TIME_ON_COURT'] = round(time_on_court_minutes, 2)
    else:
        filtered_shots_df.loc[idx, 'TIME_ON_COURT'] = None

    # Calculate TIME_LEFT
    time_left = calculate_time_left(quarter, mins_left, secs_left)
    filtered_shots_df.loc[idx, 'TIME_LEFT'] = time_left

# Save the resulting dataset to a CSV file
filtered_shots_df.to_csv('/Users/home/Downloads/nba_2015_shots_first_last_100_with_time.csv', index=False)

# Print the final dataset
print(filtered_shots_df)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_shots_df['TIME_ON_COURT'] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_shots_df['TIME_LEFT'] = None


Processing new game: 0021401229
Fetched play-by-play data for game_id 0021401229
Processing new game: 0021401229
Processing new game: 0021401225
Fetched play-by-play data for game_id 0021401225
Processing new game: 0021401229
Processing new game: 0021401229
Processing new game: 0021401229
Processing new game: 0021401229
Processing new game: 0021401230
Fetched play-by-play data for game_id 0021401230
Processing new game: 0021401222
Fetched play-by-play data for game_id 0021401222
Processing new game: 0021401222
Processing new game: 0021401230
Processing new game: 0021401225
Processing new game: 0021401225
Processing new game: 0021401230
Processing new game: 0021401222
Processing new game: 0021401219
Fetched play-by-play data for game_id 0021401219
Processing new game: 0021401222
Processing new game: 0021401230
Processing new game: 0021401230
Processing new game: 0021401222
Processing new game: 0021401222
Processing new game: 0021401230
Processing new game: 0021401229
Processing new game