In [None]:
import pandas as pd

def find_scoring_runs(df, threshold=6):
    """
    Returns a DataFrame of runs where one team scores `threshold`
    unanswered points. Columns: game_id, team, start_play_id, end_play_id.
    """
    runs = []
    df = df.sort_values(['game_id','play_id']).reset_index(drop=True)

    for game_id, g in df.groupby('game_id'):
        home, away = g.iloc[0]['home'], g.iloc[0]['away']
        home_score = away_score = 0
        run_points = {home:0, away:0}
        run_start_idx = None

        for idx, row in g.iterrows():
            h_new, a_new = row['home_score'], row['away_score']
            if h_new!=home_score or a_new!=away_score:
                team_scored = home if h_new!=home_score else away
                pts = (h_new-home_score) if team_scored==home else (a_new-away_score)

                if run_points[team_scored]==0:
                    run_start_idx = idx
                run_points[team_scored] += pts
                # reset opponent
                opp = away if team_scored==home else home
                run_points[opp] = 0

                if run_points[team_scored] >= threshold:
                    runs.append({
                        'game_id':    game_id,
                        'team':       team_scored,
                        'start_play_id': int(g.loc[run_start_idx,'play_id']),
                        'end_play_id':   int(row['play_id'])
                    })
                    run_points[team_scored] = 0
                    run_start_idx = None

            home_score, away_score = h_new, a_new

    return pd.DataFrame(runs)



def label_runs(df, runs_df):
    """
    Adds two columns to `df`:
      - run_start: 1 only on the very first play of each run
      - run_in_process: 1 for every play from start_play_id to end_play_id inclusive
    """
    df = df.sort_values(['game_id','play_id']).reset_index(drop=True)
    df['run_start'] = 0
    df['run_in_process'] = 0

    for _, run in runs_df.iterrows():
        mask = (
            (df['game_id'] == run.game_id) &
            (df['play_id'] >= run.start_play_id) &
            (df['play_id'] <= run.end_play_id)
        )
        df.loc[mask, 'run_in_process'] = 1

        # only the first play gets run_start
        df.loc[
            (df['game_id']==run.game_id) &
            (df['play_id']==run.start_play_id),
            'run_start'
        ] = 1

    return df




def label_runs(df, runs_df):
    """
    Adds two columns to `df`:
      - run_start: 1 only on the very first play of each run
      - run_in_process: 1 for every play from start_play_id to end_play_id inclusive
    """
    df = df.sort_values(['game_id','play_id']).reset_index(drop=True)
    df['run_start'] = 0
    df['run_in_process'] = 0

    for _, run in runs_df.iterrows():
        mask = (
            (df['game_id'] == run.game_id) &
            (df['play_id'] >= run.start_play_id) &
            (df['play_id'] <= run.end_play_id)
        )
        df.loc[mask, 'run_in_process'] = 1

        # only the first play gets run_start
        df.loc[
            (df['game_id']==run.game_id) &
            (df['play_id']==run.start_play_id),
            'run_start'
        ] = 1

    return df