In [None]:
def supervised_hit_bounce_detection(json_file_path, model_path='trained_model.pkl',
                                    scaler_path='scaler.pkl',
                                    label_encoder_path='label_encoder.pkl',
                                    feature_cols_path='feature_columns.pkl'):
    """
    Supervised hit and bounce detection for a single point.

    Args:
        json_file_path: Path to ball tracking JSON file
        model_path: Path to trained model (.pkl)
        scaler_path: Path to fitted scaler (.pkl)
        label_encoder_path: Path to label encoder (.pkl)
        feature_cols_path: Path to feature column names (.pkl)

    Returns:
        enriched_data: Dictionary with original data + 'pred_action' for each frame
    """

    print(f"\n{'='*70}")
    print(f"SUPERVISED HIT/BOUNCE DETECTION")
    print(f"{'='*70}")
    print(f"Input file: {json_file_path}")

    # ========================================================================
    # STEP 1: Load the JSON data
    # ========================================================================

    with open(json_file_path, 'r') as f:
        ball_data = json.load(f)

    print(f"Loaded {len(ball_data)} frames")

    # ========================================================================
    # STEP 2: Convert to DataFrame
    # ========================================================================

    frames = sorted(ball_data.keys(), key=int)
    rows = []
    for frame_idx in frames:
        details = ball_data[frame_idx]
        rows.append({
            "frame": int(frame_idx),
            "x": details.get("x"),
            "y": details.get("y"),
            "visible": details.get("visible"),
            "action": details.get("action", "unknown")  # May not exist in test data
        })

    df = pd.DataFrame(rows)
    print(f"DataFrame shape: {df.shape}")

    # ========================================================================
    # STEP 3: Feature Engineering
    # ========================================================================

    print("Calculating features...")
    df_features = calculate_optimized_features(df)
    print(f"Features calculated: {len(df_features.columns)} columns")

    # ========================================================================
    # STEP 4: Load trained model and preprocessors
    # ========================================================================

    print("Loading trained model...")
    model = joblib.load(model_path)
    scaler = joblib.load(scaler_path)
    label_encoder = joblib.load(label_encoder_path)
    feature_cols = joblib.load(feature_cols_path)

    print(f"Model loaded: {type(model).__name__}")
    print(f"Expected features: {len(feature_cols)}")

    # ========================================================================
    # STEP 5: Prepare features for prediction
    # ========================================================================

    # Select only the features used during training
    X = df_features[feature_cols].fillna(0)

    # Scale features
    X_scaled = scaler.transform(X)
    X_scaled = pd.DataFrame(X_scaled, columns=feature_cols)

    print(f"Feature matrix shape: {X_scaled.shape}")

    # ========================================================================
    # STEP 6: Make predictions
    # ========================================================================

    print("Making predictions...")
    y_pred_encoded = model.predict(X_scaled)
    y_pred = label_encoder.inverse_transform(y_pred_encoded)

    # Get prediction probabilities (if available)
    if hasattr(model, 'predict_proba'):
        y_pred_proba = model.predict_proba(X_scaled)
        confidence = y_pred_proba.max(axis=1)
    else:
        confidence = np.ones(len(y_pred))

    # ========================================================================
    # STEP 7: Create enriched JSON
    # ========================================================================

    enriched_data = {}

    for i, frame_idx in enumerate(frames):
        enriched_data[frame_idx] = {
            "x": ball_data[frame_idx].get("x"),
            "y": ball_data[frame_idx].get("y"),
            "visible": ball_data[frame_idx].get("visible"),
            "action": ball_data[frame_idx].get("action", "unknown"),
            "pred_action": y_pred[i],
            "confidence": float(confidence[i])
        }

    # ========================================================================
    # STEP 8: Summary statistics
    # ========================================================================

    print(f"\n{'='*70}")
    print("PREDICTION SUMMARY")
    print(f"{'='*70}")

    pred_counts = pd.Series(y_pred).value_counts()
    print("\nPredicted distribution:")
    for action, count in pred_counts.items():
        print(f"  {action:8s}: {count:5d} frames ({100*count/len(y_pred):.2f}%)")

    print(f"\nMean confidence: {confidence.mean():.4f}")
    print(f"Min confidence:  {confidence.min():.4f}")
    print(f"Max confidence:  {confidence.max():.4f}")

    # If ground truth exists, calculate accuracy
    if "action" in ball_data[frames[0]] and ball_data[frames[0]]["action"] != "unknown":
        y_true = [ball_data[f]["action"] for f in frames]
        accuracy = np.mean([yt == yp for yt, yp in zip(y_true, y_pred)])
        print(f"\nAccuracy (vs ground truth): {accuracy:.4f}")

        from sklearn.metrics import classification_report
        print("\nClassification Report:")
        print(classification_report(y_true, y_pred, zero_division=0))

    print(f"{'='*70}\n")

    return enriched_data
