In [None]:
import pandas as pd

In [None]:
event_data = pd.read_csv("/Users/annadaugaard/Desktop/VFF/raw_data/sample_match_1/Sample_Game_1_RawEventsData.csv")
event_data_passes = event_data[event_data["Type"] == "PASS"]
event_data_passes["From"] = (event_data_passes["From"].astype(str).str.replace("Player", "", regex=True)).astype(int)
event_data_passes["To"] = (event_data_passes["To"].astype(str).str.replace("Player", "", regex=True)).astype(int)
event_data_passes["Start X"] = (event_data_passes["Start X"]).astype(float) * 106
event_data_passes["End X"] = (event_data_passes["End X"]).astype(float) * 106
event_data_passes["Start Y"] = (event_data_passes["Start Y"]).astype(float) * 68
event_data_passes["End Y"] = (event_data_passes["End Y"]).astype(float) * 68

In [None]:
event_data_passes

In [None]:
players = pd.read_csv("/Users/annadaugaard/Desktop/VFF/explore/labelled_match_players.csv", index_col=0)
ball = pd.read_csv("/Users/annadaugaard/Desktop/VFF/explore/labelled_match_ball.csv", index_col=0)

In [None]:
ball

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Merge players and ball data on time
merged_df = players.merge(ball, on="time")

# Compute Euclidean distance and check if it's within 1 unit
merged_df["within_1_of_ball"] = np.sqrt(
    (merged_df["x"] - merged_df["ball_x"])**2 + (merged_df["y"] - merged_df["ball_y"])**2
) <= 1



In [None]:
merged_df

In [None]:
def plot_match(time_index):
    """
    Function to plot the match state at a given time index.
    Highlights players who are within 1 unit of the ball.
    Prints the player number closest to the ball.
    """
    # Filter data for the given time index
    players_at_time = players[players["time"] == ball.loc[time_index, "time"]]
    ball_at_time = ball.iloc[time_index]

    # Compute distances from ball
    players_at_time["distance_to_ball"] = np.sqrt(
        (players_at_time["x"] - ball_at_time["ball_x"])**2 +
        (players_at_time["y"] - ball_at_time["ball_y"])**2
    )

    # Identify closest player
    closest_player = players_at_time.loc[players_at_time["distance_to_ball"].idxmin()]
    closest_player_id = closest_player["id"]
    print(f"Closest player to the ball at time {ball_at_time['time']}: Player {int(closest_player_id)}")

    # Highlight players within 1 unit of ball
    players_at_time["within_1_of_ball"] = players_at_time["distance_to_ball"] <= 1

    # Plot players
    plt.figure(figsize=(8, 6))
    plt.scatter(players_at_time["x"], players_at_time["y"], c="blue", label="Players")
    
    # Highlight players within 1 unit of ball
    highlighted = players_at_time[players_at_time["within_1_of_ball"]]
    plt.scatter(highlighted["x"], highlighted["y"], c="red", label="Close to Ball", edgecolors='black', s=100)

    # Plot ball
    plt.scatter(ball_at_time["ball_x"], ball_at_time["ball_y"], c="green", label="Ball", marker="*", s=200)

    # Labels and title
    plt.xlabel("X Coordinate")
    plt.ylabel("Y Coordinate")
    plt.title(f"Match State at Time {ball_at_time['time']} (Closest: Player {int(closest_player_id)})")
    plt.legend()
    plt.grid(True)
    plt.show()

# Example usage: plot for time index 0
plot_match(3)


In [None]:
from tqdm import tqdm

# Step 1: Precompute ball possession efficiently
ball_possession = []
ball_positions = ball.set_index("time")[["ball_x", "ball_y"]].to_dict("index")

for _, row in tqdm(players.iterrows(), total=len(players), desc="Computing Possession"):
    time = row["time"]
    if time in ball_positions:
        ball_x, ball_y = ball_positions[time]["ball_x"], ball_positions[time]["ball_y"]
        distance = np.sqrt((row["x"] - ball_x)**2 + (row["y"] - ball_y)**2)
        ball_possession.append(distance <= 1)
    else:
        ball_possession.append(False)

players["within_1_of_ball"] = ball_possession

# Step 2: Track passes efficiently
passes = []
merged_df = players.merge(ball, on="time").sort_values("time")
num_rows = len(merged_df)

for i in tqdm(range(num_rows - 1), desc="Detecting Passes"):
    current_frame = merged_df.iloc[i]
    next_frame = merged_df.iloc[i + 1]

    if current_frame["within_1_of_ball"] and next_frame["within_1_of_ball"]:
        passer = current_frame["id"]
        receiver = next_frame["id"]
        
        ball_moved = (current_frame["ball_x"] != next_frame["ball_x"]) or (current_frame["ball_y"] != next_frame["ball_y"])
        different_players = passer != receiver
        
        if ball_moved and different_players:
            passes.append({
                "time": next_frame["time"],
                "passer": passer,
                "receiver": receiver,
                "start_x": current_frame["ball_x"],
                "start_y": current_frame["ball_y"],
                "end_x": next_frame["ball_x"],
                "end_y": next_frame["ball_y"],
            })

# Convert passes to a DataFrame
passes_df = pd.DataFrame(passes)

# Display the extracted passes




In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Ensure tqdm is applied properly
tqdm.pandas(desc="Processing Data")

# Merge players and ball data with a progress bar
merged_df = players.merge(ball, on="time")

# Compute Euclidean distance between player and ball with a progress bar
merged_df["distance_to_ball"] = merged_df.progress_apply(
    lambda row: np.sqrt((row["x"] - row["ball_x"])**2 + (row["y"] - row["ball_y"])**2), axis=1
)

# Label passes: Assign 1 if the current time exists in event_data_passes, otherwise 0, with progress bar
merged_df["pass_label"] = merged_df["time"].progress_apply(
    lambda t: 1 if t in event_data_passes["Start Time [s]"].values else 0
)

# Prepare features and labels
features = merged_df[["x", "y", "ball_x", "ball_y", "distance_to_ball"]]
labels = merged_df["pass_label"]

# Train-test split with a progress bar
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)


In [None]:
# Train a Random Forest Classifier with a progress bar
clf = RandomForestClassifier(n_estimators=50, random_state=42)
print("Training the model...")
clf.fit(X_train, y_train)

# Predictions with a progress bar
print("Making predictions...")
y_pred = clf.predict(X_test)

# Generate classification report
report = classification_report(y_test, y_pred, output_dict=True)

# Convert report to DataFrame and display
report_df = pd.DataFrame(report).transpose()

In [None]:
report_df

In [None]:
# Implementing a pass detection filter combining multiple heuristics
# Updated function to handle NaN values and avoid key errors

def detect_passes(filtered_df, distance_threshold=1.5, speed_threshold=1.0, time_window=2.0):
    """
    Detect passes based on multiple filters:
    1. Ball movement (speed threshold)
    2. Player proximity (who is closest before and after)
    3. Time-based validation
    Handles NaN values and avoids key errors.
    """
    detected_passes = []

    # Drop rows with NaN values to prevent errors
    filtered_df = filtered_df.dropna()

    # Iterate through ball positions to detect significant movements
    for i in range(len(filtered_df) - 1):
        current_row = filtered_df.iloc[i]
        next_row = filtered_df.iloc[i + 1]

        # Calculate ball movement distance and speed
        ball_movement = np.sqrt(
            (next_row["ball_x"] - current_row["ball_x"])**2 +
            (next_row["ball_y"] - current_row["ball_y"])**2
        )
        time_diff = next_row["time"] - current_row["time"]
        speed = ball_movement / time_diff if time_diff > 0 else 0

        # Skip if ball speed is below threshold
        if speed < speed_threshold:
            continue

        # Find the closest players before and after movement
        players_before = players[players["time"] == current_row["time"]].dropna().copy()
        players_after = players[players["time"] == next_row["time"]].dropna().copy()

        if players_before.empty or players_after.empty:
            continue

        # Compute distances for players before and after ball movement
        players_before["distance_to_ball"] = np.sqrt(
            (players_before["x"] - current_row["ball_x"])**2 +
            (players_before["y"] - current_row["ball_y"])**2
        )
        players_after["distance_to_ball"] = np.sqrt(
            (players_after["x"] - next_row["ball_x"])**2 +
            (players_after["y"] - next_row["ball_y"])**2
        )

        # Identify the closest player before and after
        if not players_before["distance_to_ball"].isnull().all():
            passer = players_before.loc[players_before["distance_to_ball"].idxmin()]["id"]
        else:
            continue

        if not players_after["distance_to_ball"].isnull().all():
            receiver = players_after.loc[players_after["distance_to_ball"].idxmin()]["id"]
        else:
            continue

        # Ensure the ball moved between two different players within a reasonable time window
        if passer != receiver and time_diff <= time_window:
            detected_passes.append({
                "start_time": current_row["time"],
                "end_time": next_row["time"],
                "passer": passer,
                "receiver": receiver,
                "start_x": current_row["ball_x"],
                "start_y": current_row["ball_y"],
                "end_x": next_row["ball_x"],
                "end_y": next_row["ball_y"],
                "ball_speed": speed,
            })

    return pd.DataFrame(detected_passes)


# Run the updated pass detection filter
filtered_passes_df = detect_passes(ball)



In [None]:
filtered_passes_df