In [1]:
import torch
import kagglehub
import pandas as pd
from momentum_model import load_model

In [2]:
path = kagglehub.dataset_download("robbypeery/college-basketball-pbp-23-24")
df = pd.read_csv(path+'/Colorado_pbp.csv')
df.drop(columns=['date','time_remaining_half', 'home_time_out_remaining', 'win_prob', 'naive_win_prob', 'away_time_out_remaining','home_favored_by','total_line','referees','arena_location','arena','attendance','secs_remaining_absolute'], inplace=True)
df.head(5)
df['points_last_min_diff'] = 0

# Iterate through the DataFrame row by row
for idx, row in df.iterrows():
    current_half = row['half']
    current_secs = row['secs_remaining']

    # Filter for plays within the last 60 seconds
    mask = (df['secs_remaining'] >= current_secs - 60) & (df['secs_remaining'] <= current_secs)
    recent_plays = df[mask]

    # Sum points scored by home and away in that window
    curr_points = row['score_diff']
    start_points = recent_plays.head(1)['score_diff'].values[0]
    diff = int(curr_points) - int(start_points)

    df.loc[idx, 'points_next_min_diff'] = diff

In [10]:
import numpy as np

df = df.copy()
df['points_next_min_diff1'] = 0

# Ensure sorting within each game
df.sort_values(['game_id', 'secs_remaining'], ascending=[True, False], inplace=True)
total=0
# Process each game independently
for game_id, group in df.groupby('game_id'):

    times = group['secs_remaining'].values
    ids = group['play_id'].values
    scores = group['score_diff'].values
    result = np.zeros(len(group), dtype=int)

    # Two-pointer approach
    i = 0
    for j in range(len(group)):
        # Move i forward until we get to just under 60 seconds ahead
        while i < len(group) and times[i] >= times[j] - 60:
            i += 1
        result[j] = scores[i - 1] - scores[j] if i > 0 else 0
    df.loc[group.index, 'points_next_min_diff'] = result

8
2292     2371
2293     2371
2294     2362
2295     2362
2296     2344
         ... 
10552    2347
10553    2347
10554    2319
10555    2319
10556    2319
Name: secs_remaining, Length: 209, dtype: int64
2292     2371
2293     2371
2294     2362
2295     2362
2296     2344
         ... 
10552    2347
10553    2347
10554    2319
10555    2319
10556    2319
Name: secs_remaining, Length: 209, dtype: int64
2294     2362
2295     2362
2296     2344
2297     2325
2298     2312
         ... 
10554    2319
10555    2319
10556    2319
10557    2310
10558    2310
Name: secs_remaining, Length: 230, dtype: int64
2294     2362
2295     2362
2296     2344
2297     2325
2298     2312
         ... 
10554    2319
10555    2319
10556    2319
10557    2310
10558    2310
Name: secs_remaining, Length: 230, dtype: int64
2296     2344
2297     2325
2298     2312
2299     2297
2300     2291
         ... 
10554    2319
10555    2319
10556    2319
10557    2310
10558    2310
Name: secs_remaining, Length: 242, d

KeyboardInterrupt: 

In [5]:
model = load_model()

X_cat, X_num, y = model.process_dataframe(df, make_labels=True)

device = next(model.parameters()).device  # <<< Automatically use same device as model!

logits, attention = model(X_cat.to(device).long(), X_num.to(device))
pred_class = torch.argmax(logits, dim=1)

print("Run" if pred_class[0] == 1 else "No Run")

Run


In [7]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Example y_true (real labels) and y_pred (predicted labels)
y_true = y.numpy()  # Assuming y is your labels from dataset (torch tensor)
y_pred = pred_class.cpu().numpy()  # Make sure to move to CPU if needed

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Accuracy: 0.3854
Precision: 0.7387
Recall: 0.2569
F1 Score: 0.3812
