In [None]:
import pandas as pd
import numpy as np

In [None]:
ball = pd.read_csv("/Users/annadaugaard/Desktop/VFF/explore/labelled_match_ball_match.csv", index_col=0)
event_data = pd.read_csv("/Users/annadaugaard/Desktop/VFF/raw_data/sample_match_1/Sample_Game_1_RawEventsData.csv")
event_data_passes = event_data[event_data["Type"] == "PASS"]



In [None]:
len(event_data)

In [None]:
event_data_passes

In [None]:
event_data_passes["From"] = (event_data_passes["From"].astype(str).str.replace("Player", "", regex=True)).astype(int)
event_data_passes["To"] = (event_data_passes["To"].astype(str).str.replace("Player", "", regex=True)).astype(int)
event_data_passes["Start X"] = (event_data_passes["Start X"]).astype(float) * 106
event_data_passes["End X"] = (event_data_passes["End X"]).astype(float) * 106
event_data_passes["Start Y"] = (event_data_passes["Start Y"]).astype(float) * 68
event_data_passes["End Y"] = (event_data_passes["End Y"]).astype(float) * 68

In [None]:
len(ball)

In [None]:

ball = ball.dropna()
# Calculate differences to compute speed
ball['dx'] = ball['ball_x'].diff()
ball['dy'] = ball['ball_y'].diff()
ball['dt'] = ball['time'].diff()

# Calculate speed (Euclidean distance per time difference)
ball['speed'] = np.sqrt(ball['dx']**2 + ball['dy']**2) / ball['dt']

ball['acceleration'] = ball["speed"].diff() / ball['time'].diff()
ball = ball[ball['speed'] <= 36]
# Drop intermediate calculation columns from the cleaned tracking data
ball.drop(columns=['dx', 'dy', 'dt'], inplace=True)

In [None]:
len(ball)

In [None]:
len(ball)

In [None]:

def filter_pass_events_by_timestamp_coverage(pass_df, time_df, step=0.04, missing_threshold=0.30):
    """
    Filters pass events based on timestamp coverage.
    
    For each pass event in pass_df (with columns "Start Time [s]" and "End Time [s]"),
    a sequence of expected timestamps is generated from start to end with intervals of 'step' seconds.
    The function then checks how many of these expected timestamps are present in time_df["time"].
    If more than missing_threshold (default 30%) of the expected timestamps are missing, the event is discarded.
    
    Parameters:
      pass_df (pd.DataFrame): DataFrame containing pass events with "Start Time [s]" and "End Time [s]".
      time_df (pd.DataFrame): DataFrame containing available timestamps in a column named "time".
      step (float): The time step for generating expected timestamps (default 0.04).
      missing_threshold (float): Maximum allowable fraction of missing timestamps (default 0.30).
    
    Returns:
      pd.DataFrame: A filtered version of pass_df.
    """
    # Create a set of available timestamps from time_df, rounding to 2 decimals
    available_times = set(np.round(time_df["time"].values, 2))
    
    filtered_rows = []
    for _, row in pass_df.iterrows():
        start = row["Start Time [s]"]
        end = row["End Time [s]"]
        # Generate expected timestamps from start to end (inclusive) using step of 0.04.
        # Adding half-step to ensure inclusion of the endpoint in floating-point arithmetic.
        expected_times = np.arange(start, end + step/2, step)
        # Round expected times for safe comparison.
        expected_times = np.round(expected_times, 2)
        expected_count = len(expected_times)
        # Count how many expected timestamps are found in available_times.
        found_count = sum(1 for t in expected_times if t in available_times)
        missing_fraction = 1 - (found_count / expected_count) if expected_count > 0 else 0
        
        # If the fraction of missing timestamps is at most the threshold, keep the event.
        if missing_fraction <= missing_threshold:
            filtered_rows.append(row)
    
    return pd.DataFrame(filtered_rows)


# Apply the filter function:
filtered_pass_df = filter_pass_events_by_timestamp_coverage(event_data_passes, ball, step=0.04, missing_threshold=0.0)

print("Filtered Pass Events:")
print(filtered_pass_df)

In [None]:
# Define a threshold for outliers (e.g., z-score > 3 or < -3)
import matplotlib.pyplot as plt

import seaborn as sns
# Create a density plot on the column 'my_column'
plt.figure(figsize=(10, 6))
sns.kdeplot(data=ball, x='speed', fill=True, color='skyblue')
plt.title('Density Plot of my_column')
plt.xlabel('my_column')
plt.ylabel('Density')
plt.show()

In [None]:
def kalman_filter_speed(speed_data, process_variance=1e-3, measurement_variance=0.1):
    """
    Apply a simple one-dimensional Kalman filter to a 1D array of speed measurements.
    
    Parameters:
      speed_data (array-like): The measured speeds.
      process_variance (float): Variance of the process noise.
      measurement_variance (float): Variance of the measurement noise.
      
    Returns:
      np.array: The filtered speed estimates.
    """
    n = len(speed_data)
    # Allocate arrays for estimates and error covariance.
    xhat = np.zeros(n)  # Filtered estimate.
    P = np.zeros(n)     # Estimate error covariance.
    
    # Initialize with the first measurement.
    xhat[0] = speed_data[0]
    P[0] = 1.0  # initial uncertainty
    
    for k in range(1, n):
        # Prediction step: assume the state (speed) doesn't change
        xhatminus = xhat[k-1]
        Pminus = P[k-1] + process_variance
        
        # Update step: incorporate the measurement at time k
        K = Pminus / (Pminus + measurement_variance)
        xhat[k] = xhatminus + K * (speed_data[k] - xhatminus)
        P[k] = (1 - K) * Pminus
        
    return xhat

# Example usage:
# Let's assume you have a DataFrame with a column 'speed'
# Here we create some synthetic data for demonstration.

# Apply the Kalman filter to the 'speed' column.
ball['filtered_acceleration'] = kalman_filter_speed(ball['acceleration'].values, process_variance=1e-3, measurement_variance=0.01)


# Plot the results.
plt.figure(figsize=(12,6))
plt.plot(ball['time'][0:200], ball['acceleration'][0:200], label='Original Speed', marker='o', linestyle='--', alpha=0.7)
#plt.plot(ball['time'][0:200], ball['filtered_acceleration'][0:200], label='Filtered Speed', linewidth=2)
plt.xlabel("Time")
plt.ylabel("acceleration")
plt.title("Kalman Filter Applied to Ball Acceleration")
plt.axhline(y=20, color='red', linestyle='--', linewidth=1.5)
plt.axhline(y=-20, color='red', linestyle='--', linewidth=1.5)

plt.legend()
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Aesthetic and clean plot
plt.figure(figsize=(14, 7))
plt.plot(
    ball['time'][0:200], 
    ball['acceleration'][0:200], 
    label='Original Acceleration', 
    marker='o', 
    linestyle='--', 
    alpha=0.6, 
    linewidth=2, 
    markersize=4
)

# Threshold lines
plt.axhline(y=20, color='crimson', linestyle='--', linewidth=2, label='±20m/s² Threshold for kicks and recieval of the ball')
plt.axhline(y=-20, color='crimson', linestyle='--', linewidth=2)

# Labels and title
plt.xlabel("Time (s)", fontsize=14)
plt.ylabel("Acceleration (m/s²)", fontsize=14)
plt.title("Acceleration of the ball first 10 seconds", fontsize=16, fontweight='bold')

# Grid and legend
plt.grid(True, linestyle='--', alpha=0.3)
plt.legend(fontsize=12)
plt.tight_layout()

# Show the plot
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
color1= "blue"
color2 = "darkviolet"
# Create figure and primary axis
fig, ax1 = plt.subplots(figsize=(14, 7))

# --- Plot acceleration on the primary y-axis (left) ---
ax1.plot(
    ball['time'], 
    ball['acceleration'], 
    label='Original Acceleration', 
    marker='o', 
    linestyle='--', 
    alpha=0.6, 
    linewidth=2, 
    markersize=4,
    color='dodgerblue'
)
# Labels and grid
ax1.set_xlabel("Time (s)", fontsize=14)
ax1.set_ylabel("Acceleration (m/s²)", fontsize=14, color='black')
ax1.tick_params(axis='y', labelcolor='black')
ax1.grid(True, linestyle='--', alpha=0.3)

# --- Scatter markers and annotations ---

# Interpolate to get y-values for scatter points
start_times = event_data_passes['Start Time [s]'][2:4]
end_times = event_data_passes['End Time [s]'][2:4]

start_y = np.interp(start_times, ball['time'], ball['acceleration'])
end_y = np.interp(end_times, ball['time'], ball['acceleration'])

# Scatter markers
ax1.scatter(start_times, start_y, color=color1, label='Start Time', s=60, zorder=5)
ax1.scatter(end_times, end_y, color=color2, label='End Time', s=60, zorder=5)

# Add text labels at 45 degrees
for x, y in zip(start_times, start_y):
    ax1.text(x-0.11, y + -60.5, 'kicked', rotation=45, color=color1, fontsize=10)

for x, y in zip(end_times, end_y):
    ax1.text(x, y + 20.5, 'recieved', rotation=45, color=color2, fontsize=10)

# Title and legend
plt.title("Ball Acceleration with markers for a kick (start of pass) and receival (end of pass)", fontsize=16, fontweight='bold')

# Combine and show legend
#lines_1, labels_1 = ax1.get_legend_handles_labels()
#ax1.legend(lines_1, labels_1, loc='upper right', fontsize=12)

plt.tight_layout()
plt.show()


In [None]:
window_size = 5

# Create a new column for the moving average smoothed acceleration
ball['smoothed_acceleration'] = ball['acceleration'].rolling(window=window_size, center=True).mean()

# Plot for comparison
plt.figure(figsize=(10,6))
plt.plot(ball['time'][0:200], ball['acceleration'][0:200], label='Original Acceleration', alpha=0.6)
plt.plot(ball['time'][0:200], ball['smoothed_acceleration'][0:200], label='Smoothed (Moving Average)', linewidth=2)
plt.axhline(y=5, color='red', linestyle='--', linewidth=1.5)
plt.axhline(y=-5, color='red', linestyle='--', linewidth=1.5)
plt.xlabel("Time")
plt.ylabel("Acceleration")
plt.legend()
plt.title("Acceleration Smoothing with Moving Average")
plt.show()


In [None]:
import matplotlib.pyplot as plt

# Set moving average window
window_size = 5
ball['smoothed_acceleration'] = ball['acceleration'].rolling(window=window_size, center=True).mean()

# Create the plot
plt.figure(figsize=(14, 7))

# Original acceleration
plt.plot(
    ball['time'][190:300], 
    ball['acceleration'][190:300], 
    label='Original Acceleration', 
    linestyle='--', 
    alpha=0.5, 
    linewidth=2, 
    color='gray'
)

# Smoothed acceleration
plt.plot(
    ball['time'][190:300], 
    ball['smoothed_acceleration'][190:300], 
    label=f'Smoothed (Moving Avg, window={window_size})', 
    linewidth=2.5, 
    color='dodgerblue'
)

# Threshold lines
#plt.axhline(y=5, color='crimson', linestyle='--', linewidth=1.5, label='±5 Threshold')
#plt.axhline(y=-5, color='crimson', linestyle='--', linewidth=1.5)

# Labels and aesthetics
plt.xlabel("Time (s)", fontsize=14)
plt.ylabel("Acceleration (m/s²)", fontsize=14)
plt.title("Acceleration Smoothing with Moving Average", fontsize=16, fontweight='bold')

# Grid, legend, layout
plt.grid(True, linestyle='--', alpha=0.3)
plt.legend(fontsize=12, loc='upper right')
plt.tight_layout()
plt.show()


In [None]:
ball['smoothed_acceleration_observed'] = [1 if abs(x) >= 5 else 0 for x in ball['smoothed_acceleration']]


In [None]:
players = pd.read_csv("/Users/annadaugaard/Desktop/VFF/explore/labelled_match_players_match.csv", index_col=0)

In [None]:
players_and_ball = players.merge(ball, on="time", how="left")

In [None]:
players_and_ball

In [None]:
players_and_ball["smoothed_acceleration"].unique()

In [None]:
players_and_ball

In [None]:
players_and_ball = players_and_ball.dropna()

In [None]:
#players_and_ball = players_and_ball.dropna()
# Compute the Euclidean distance from the player (x, y) to the ball (ball_x, ball_y)
players_and_ball["distance_to_ball"] = np.sqrt((players_and_ball["x"] - players_and_ball["ball_x"])**2 +
                                          (players_and_ball["y"] - players_and_ball["ball_y"])**2)

# For each time point, rank the players by distance (1 = closest)
players_and_ball["distance_rank"] = players_and_ball.groupby("time")["distance_to_ball"].rank(method="min")
threshold = 3.0

# For each time point, count how many players are within the threshold distance to the ball.
# We use groupby with transform so that every row for the same time gets the same count.
players_and_ball["uncertainty_index"] = players_and_ball.groupby("time")["distance_to_ball"].transform(
    lambda x: (x <= threshold).sum()
)

rank_1= players_and_ball[players_and_ball["distance_rank"] == 1]
rank_1_index= rank_1[rank_1["smoothed_acceleration_observed"] == 1]

In [None]:
rank_1_index

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Example: group by time and id, then count occurrences
grouped = rank_1_index[1:100].groupby(["time", "id"]).size().unstack(fill_value=0)

# Create a stacked bar plot of counts.
ax = grouped.plot(kind="bar", stacked=True, figsize=(15, 6))
plt.legend(title="Player ID", bbox_to_anchor=(1.05, 1), loc="upper left")

# Compute an uncertainty index per time.
# For example, take the maximum uncertainty_index value at each time.
uncertainty_by_time = rank_1_index[1:100].groupby("time")["uncertainty_index"].max()

# Get the unique times (the index of 'grouped'); these serve as our x-axis ticks.
unique_times = grouped.index
x_positions = np.arange(len(unique_times))

# Ensure the uncertainty series is aligned to the same unique_times order.
uncertainty_by_time = uncertainty_by_time.loc[unique_times]

bar_width = 0.8
ax.hlines(y=uncertainty_by_time, 
          xmin=x_positions - bar_width/2, 
          xmax=x_positions + bar_width/2, 
          colors="black", 
          linewidth=2, 
          label="Uncertainty Index", 
          zorder=3)

plt.xlabel("Time [s]")
plt.xticks(x_positions, [f"{t:.2f}" for t in unique_times], rotation=45)
plt.tight_layout()
plt.show()


### If there is more than 7 seconds between the first timestamp of a player having a ball, and the timestmap of another player having a ball do not count it as a pass

### If there is multiple players close to the closest player, increase uncertainty score 

### 

In [None]:
import numpy as np
import pandas as pd

def resolve_ties_by_team(df):
    """Resolve ties at the same timestamp by checking previous and next team's alignment."""
    unique_times = df["time"].unique()
    resolved = []
    for i, t in enumerate(unique_times):
        candidates = df[df["time"] == t]
        if len(candidates) == 1:
            resolved.append(candidates.iloc[0])
        else:
            # If we have a previous candidate, use its team.
            if resolved:
                prev_team = resolved[-1]["Team"]
            else:
                prev_team = None

            # Look at next unique time (if exists)
            if i < len(unique_times) - 1:
                next_time = unique_times[i+1]
                next_candidates = df[df["time"] == next_time]
                next_team = next_candidates.iloc[0]["Team"] if len(next_candidates) > 0 else None
            else:
                next_team = None

            chosen = None
            # 1) Try matching both prev_team & next_team.
            if prev_team and next_team:
                both = candidates[(candidates["Team"] == prev_team) & (candidates["Team"] == next_team)]
                if len(both) == 1:
                    chosen = both.iloc[0]
            # 2) If not, try matching prev_team.
            if chosen is None and prev_team:
                match_prev = candidates[candidates["Team"] == prev_team]
                if len(match_prev) == 1:
                    chosen = match_prev.iloc[0]
            # 3) If still not, try matching next_team.
            if chosen is None and next_team:
                match_next = candidates[candidates["Team"] == next_team]
                if len(match_next) == 1:
                    chosen = match_next.iloc[0]
            # 4) Fallback: choose the first candidate.
            if chosen is None:
                chosen = candidates.iloc[0]
            resolved.append(chosen)
    return pd.DataFrame(resolved).reset_index(drop=True)

def compress_consecutive_id(df):
    """
    Group consecutive rows with the same id into a single block with start/end times.
    Only blocks with at least 3 observations (count >= 3) are retained.
    """
    blocks = []
    current_block = None
    for _, row in df.iterrows():
        if current_block is None:
            # Start a new block with count 1.
            current_block = {
                "id": row["id"],
                "Team": row["Team"],
                "start_time": row["time"],
                "end_time": row["time"],
                "count": 1
            }
        else:
            if row["id"] == current_block["id"]:
                current_block["end_time"] = row["time"]
                current_block["count"] += 1
            else:
                # Only add the block if it has at least 3 observations.
                if current_block["count"] >= 3:
                    blocks.append(current_block)
                # Start a new block for the new id.
                current_block = {
                    "id": row["id"],
                    "Team": row["Team"],
                    "start_time": row["time"],
                    "end_time": row["time"],
                    "count": 1
                }
    if current_block and current_block["count"] >= 3:
        blocks.append(current_block)
    return pd.DataFrame(blocks)

def build_pass_events(blocks_df, rank_df, uncertainty_col="uncertainty_index"):
    """
    Create a pass event for each adjacent pair of blocks, but only if both blocks belong to the same team.
    
    For each pass event, defined as the transition between adjacent blocks in blocks_df,
    we compute the mean uncertainty over the time interval from the start time of the current block
    to the start time of the next block using values from rank_df.
    
    Returns a DataFrame with columns:
      - "Start Time [s]"
      - "End Time [s]"
      - "From"
      - "To"
      - "uncertainty"
      - "Team" (the team for the event)
    """
    blocks_df = blocks_df.sort_values("start_time").reset_index(drop=True)
    events = []
    for i in range(len(blocks_df) - 1):
        # Only create a pass event if both blocks are on the same team.
        if blocks_df.loc[i, "Team"] != blocks_df.loc[i+1, "Team"]:
            continue
        start_time = blocks_df.loc[i, "start_time"]
        end_time = blocks_df.loc[i+1, "start_time"]
        # Filter rows from rank_df with times between start_time and end_time.
        subset = rank_df[(rank_df["time"] >= start_time) & (rank_df["time"] <= end_time)]
        uncertainty_value = subset[uncertainty_col].mean() if not subset.empty else np.nan
        events.append({
            "Start Time [s]": start_time,
            "End Time [s]": end_time,
            "From": blocks_df.loc[i, "id"],
            "To": blocks_df.loc[i+1, "id"],
            "uncertainty": uncertainty_value,
            "Team": blocks_df.loc[i, "Team"]
        })
    return pd.DataFrame(events)

# -------------------------------
# Example usage:
# Assume 'rank_1_index' is your original DataFrame with at least columns "time", "id", "Team", and "uncertainty_index".

# 1) Resolve ties in your DataFrame.
df_resolved = resolve_ties_by_team(rank_1_index)

# 2) Compress consecutive rows by id but only keep blocks with at least 3 observations.
df_blocks = compress_consecutive_id(df_resolved)

# 3) Build pass events from the valid blocks, computing uncertainty over the interval.
#    Only events where both blocks belong to the same team are kept.
df_passes = build_pass_events(df_blocks, rank_1_index, uncertainty_col="uncertainty_index")

df_passes


In [None]:
# Filter rows where (End Time [s] - Start Time [s]) <= 7
df_filtered = df_passes[(df_passes["End Time [s]"] - df_passes["Start Time [s]"]) <= 10]

In [None]:
df_filtered

In [None]:
event_data_passes_subset = filtered_pass_df[["Start Time [s]", "End Time [s]","From", "To"]]

In [None]:
np.median(event_data_passes_subset["End Time [s]"] - event_data_passes_subset["Start Time [s]"])

In [None]:
#### WEIGHTED TRIANGLE MODEL

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# Create a triangle in 2D space
# Points: ball (0,0), left base (-1, 3), right base (1, 3)
ball = np.array([0, 0])
left = np.array([-1.5, 3])
right = np.array([1.5, 3])

# Define a grid over the triangle bounding box
x = np.linspace(-1.5, 1.5, 100)
y = np.linspace(0, 3, 100)
X, Y = np.meshgrid(x, y)

# Barycentric coordinates to check if a point is inside triangle
def point_in_triangle(px, py, a, b, c):
    v0 = c - a
    v1 = b - a
    v2 = np.array([px, py]) - a

    dot00 = np.dot(v0, v0)
    dot01 = np.dot(v0, v1)
    dot02 = np.dot(v0, v2)
    dot11 = np.dot(v1, v1)
    dot12 = np.dot(v1, v2)

    denom = dot00 * dot11 - dot01 * dot01
    if denom == 0:
        return False

    u = (dot11 * dot02 - dot01 * dot12) / denom
    v = (dot00 * dot12 - dot01 * dot02) / denom
    return (u >= 0) and (v >= 0) and (u + v <= 1)

# Create the "height" function: high at y=0 and y=3, low in the middle
def triangle_density(x, y):
    norm_y = y / 3  # normalize y to 0-1
    return np.exp(-((norm_y - 0.15) ** 2) * 10) + np.exp(-((norm_y - 0.85) ** 2) * 10)

# Apply mask and height function
Z = np.zeros_like(X)
for i in range(X.shape[0]):
    for j in range(X.shape[1]):
        if point_in_triangle(X[i, j], Y[i, j], ball, left, right):
            Z[i, j] = triangle_density(X[i, j], Y[i, j])

# Plot the triangle with the density profile
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, Z, cmap='viridis', edgecolor='none', alpha=0.9)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Weight (Density)')
ax.set_title('3D Weighted Triangle – High Density at Tip and Base')

plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

# --- Triangle Definition ---
ball = np.array([0, 0])
left = np.array([-1.5, 3])
right = np.array([1.5, 3])

# Grid for evaluation
x = np.linspace(-1.6, 1.6, 300)
y = np.linspace(-0.1, 3.1, 300)
X, Y = np.meshgrid(x, y)

# Barycentric check
def point_in_triangle(px, py, a, b, c):
    v0, v1 = c - a, b - a
    v2 = np.array([px, py]) - a
    dot00, dot01 = np.dot(v0, v0), np.dot(v0, v1)
    dot02, dot11 = np.dot(v0, v2), np.dot(v1, v1)
    dot12 = np.dot(v1, v2)
    denom = dot00 * dot11 - dot01 * dot01
    if denom == 0:
        return False
    u = (dot11 * dot02 - dot01 * dot12) / denom
    v = (dot00 * dot12 - dot01 * dot02) / denom
    return (u >= 0) and (v >= 0) and (u + v <= 1)

# Triangle weight function
def triangle_density(x, y):
    norm_y = y / 3
    return np.exp(-((norm_y - 0.15) ** 2) * 10) + np.exp(-((norm_y - 0.85) ** 2) * 10)

# Mask and apply density
Z = np.zeros_like(X)
for i in range(X.shape[0]):
    for j in range(X.shape[1]):
        if point_in_triangle(X[i, j], Y[i, j], ball, left, right):
            Z[i, j] = triangle_density(X[i, j], Y[i, j])

# --- Plotting ---
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111, projection='3d')

# Surface
surf = ax.plot_surface(X, Y, Z, cmap='viridis', edgecolor='none', alpha=0.95, antialiased=True)

# Highlight triangle edges
for start, end in [(ball, left), (left, right), (right, ball)]:
    ax.plot([start[0], end[0]], [start[1], end[1]], [0, 0], color='black', linewidth=2)

# Labels and view
ax.set_xlabel('X', labelpad=10, fontsize=12)
ax.set_ylabel('Y', labelpad=10, fontsize=12)
ax.set_zlabel('Weight (Density)', labelpad=10, fontsize=12)
ax.set_title('Weighted Triangle Model\nHigh Density at Tip and Base', fontsize=14, pad=20)
ax.view_init(elev=35, azim=-60)
ax.grid(False)

# Remove axes spines for cleaner look
ax.xaxis.pane.fill = False
ax.yaxis.pane.fill = False
ax.zaxis.pane.fill = False

plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# Triangle points
ball = np.array([0, 0])
left = np.array([-1.5, 3])
right = np.array([1.5, 3])

# Grid setup
x = np.linspace(-1.6, 1.6, 300)
y = np.linspace(-0.1, 3.1, 300)
X, Y = np.meshgrid(x, y)

def point_in_triangle(px, py, a, b, c):
    v0, v1 = c - a, b - a
    v2 = np.array([px, py]) - a
    dot00, dot01 = np.dot(v0, v0), np.dot(v0, v1)
    dot02, dot11 = np.dot(v0, v2), np.dot(v1, v1)
    dot12 = np.dot(v1, v2)
    denom = dot00 * dot11 - dot01 * dot01
    if denom == 0:
        return False
    u = (dot11 * dot02 - dot01 * dot12) / denom
    v = (dot00 * dot12 - dot01 * dot02) / denom
    return (u >= 0) and (v >= 0) and (u + v <= 1)

def triangle_density(x, y):
    norm_y = y / 3
    return np.exp(-((norm_y - 0.15) ** 2) * 10) + np.exp(-((norm_y - 0.85) ** 2) * 10)

# Apply mask to triangle only
Z = np.zeros_like(X)
for i in range(X.shape[0]):
    for j in range(X.shape[1]):
        if point_in_triangle(X[i, j], Y[i, j], ball, left, right):
            Z[i, j] = triangle_density(X[i, j], Y[i, j])
        else:
            Z[i, j] = np.nan  # Hide outside triangle

# --- Plot only the surface ---
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111, projection='3d')

# Surface only, no edge lines or grid
surf = ax.plot_surface(X, Y, Z, cmap='viridis', edgecolor='none', alpha=1, antialiased=True)

# Clean up axes
ax.set_xlabel('X', labelpad=10, fontsize=12)
ax.set_ylabel('Y', labelpad=10, fontsize=12)
ax.set_zlabel('Weight', labelpad=10, fontsize=12)
ax.set_title('Weighted Triangle Surface – Tip & Base Emphasis', fontsize=14, pad=20)
ax.view_init(elev=40, azim=-60)
ax.set_box_aspect([2, 2, 1])  # Equal aspect ratio

# Optional: hide the panes to make it float
ax.xaxis.pane.fill = False
ax.yaxis.pane.fill = False
ax.zaxis.pane.fill = False
ax.grid(False)

plt.tight_layout()
plt.show()


In [None]:
import pandas as pd

def count_pass_event_matches(df1, df2, tolerance=15):
    """
    Compare two pass event DataFrames (df1 and df2) and count how many events in df1
    have a matching event in df2. An event in df1 is considered a match if there exists
    an event in df2 such that:
      - |df1["Start Time [s]"] - df2["Start Time [s]"]| <= tolerance
      - |df1["End Time [s]"] - df2["End Time [s]"]| <= tolerance
      - df1["From"] == df2["From"] and df1["To"] == df2["To"]
      
    Parameters:
      df1 (pd.DataFrame): Ground-truth pass events.
      df2 (pd.DataFrame): Predicted pass events.
      tolerance (float): Tolerance (in seconds) allowed on the start and end times.
      
    Returns:
      dict: A dictionary with keys:
            - "match_count": number of events in df1 with a matching event in df2
            - "total_df1": total events in df1
            - "total_df2": total events in df2
            - "recall": (matches / total_df1) * 100 (% of ground-truth events found in predictions)
            - "precision": (matches / total_df2) * 100 (% of predicted events that are correct)
            - "f1_score": the F1 score based on recall and precision.
    """
    matches = 0
    # For each event in df1, look for a matching event in df2
    for i, row1 in df1.iterrows():
        for j, row2 in df2.iterrows():
            if (abs(row1["Start Time [s]"] - row2["Start Time [s]"]) <= tolerance and
                abs(row1["End Time [s]"] - row2["End Time [s]"]) <= tolerance and
                row1["From"] == row2["From"] and
                row1["To"] == row2["To"]):
                matches += 1
                break  # Once a match is found for this event, move to the next event in df1.

    total_df1 = len(df1)
    total_df2 = len(df2)
    recall = (matches / total_df1) * 100 if total_df1 > 0 else 0
    precision = (matches / total_df2) * 100 if total_df2 > 0 else 0
    f1_score = (2 * precision * recall / (precision + recall)) if (precision + recall) > 0 else 0
    
    summary = {
        "match_count": matches,
        "total_df1": total_df1,
        "total_df2": total_df2,
        "recall": recall,
        "precision": precision,
        "f1_score": f1_score
    }
    
    print("Evaluation Metrics:")
    print(f"Total events in labelled data (ground truth): {total_df1}")
    print(f"Total events in predicted (predictions): {total_df2}")
    print(f"Match Count: {matches}")
    print(f"Recall: {recall:.2f}%")
    print(f"Precision: {precision:.2f}%")
    print(f"F1 Score: {f1_score:.2f}%")
    
    return summary
# Evaluate matches:
metrics = count_pass_event_matches(event_data_passes_subset, df_filtered, tolerance=6)

# Precision measures how many of the items your model identified as positive (or relevant) are actually correct.
# Recall measures how many of the actual positive (or relevant) items your model was able to capture.


### CHOICES: MISSING TIMESTAMPS NÅR JEG VURDERE, THREHOLD = 0? 
### TOLERENCE PÅ 8 SEKUNDER?
### MAKSIMUM DURATION AF PASS? 10SEKUNDER? 


In [None]:
import pandas as pd

def count_pass_event_matches(df1, df2, tolerance=15):
    """
    Compare two pass event DataFrames (df1 and df2) and count how many events in df1
    have a matching event in df2. An event in df1 is considered a match if there exists
    an event in df2 such that:
      - |df1["Start Time [s]"] - df2["Start Time [s]"]| <= tolerance
      - |df1["End Time [s]"] - df2["End Time [s]"]| <= tolerance
      - df1["From"] == df2["From"] and df1["To"] == df2["To"]
      
    Parameters:
      df1 (pd.DataFrame): Ground-truth pass events.
      df2 (pd.DataFrame): Predicted pass events.
      tolerance (float): Tolerance (in seconds) allowed on the start and end times.
      
    Returns:
      dict: A dictionary with keys:
            - "match_count": number of events in df1 with a matching event in df2
            - "total_df1": total events in df1
            - "total_df2": total events in df2
            - "recall": (matches / total_df1) * 100 (% of ground-truth events found in predictions)
            - "precision": (matches / total_df2) * 100 (% of predicted events that are correct)
            - "f1_score": the F1 score based on recall and precision.
    """
    matches = 0
    # For each event in df1, look for a matching event in df2
    for i, row1 in df1.iterrows():
        for j, row2 in df2.iterrows():
            if (abs(row1["Start Time [s]"] - row2["Start Time [s]"]) <= tolerance and
                abs(row1["End Time [s]"] - row2["End Time [s]"]) <= tolerance and
                row1["From"] == row2["From"] and
                row1["To"] == row2["To"]):
                matches += 1
                break  # Once a match is found for this event, move to the next event in df1.

    total_df1 = len(df1)
    total_df2 = len(df2)
    recall = (matches / total_df1) * 100 if total_df1 > 0 else 0
    precision = (matches / total_df2) * 100 if total_df2 > 0 else 0
    f1_score = (2 * precision * recall / (precision + recall)) if (precision + recall) > 0 else 0
    
    summary = {
        "match_count": matches,
        "total_df1": total_df1,
        "total_df2": total_df2,
        "recall": recall,
        "precision": precision,
        "f1_score": f1_score
    }
    
    print("Evaluation Metrics:")
    print(f"Total events in labelled data (ground truth): {total_df1}")
    print(f"Total events in predicted (predictions): {total_df2}")
    print(f"Match Count: {matches}")
    print(f"Recall: {recall:.2f}%")
    print(f"Precision: {precision:.2f}%")
    print(f"F1 Score: {f1_score:.2f}%")
    
    return summary
# Evaluate matches:
metrics = count_pass_event_matches(event_data_passes_subset, df_filtered, tolerance=6)

# Precision measures how many of the items your model identified as positive (or relevant) are actually correct.
# Recall measures how many of the actual positive (or relevant) items your model was able to capture.


### CHOICES: MISSING TIMESTAMPS NÅR JEG VURDERE, THREHOLD = 0? 
### TOLERENCE PÅ 8 SEKUNDER?
### MAKSIMUM DURATION AF PASS? 10SEKUNDER? 


In [None]:
def event_matches(pred_event, gt_event, tolerance):
    """
    Returns True if the predicted event matches the ground truth event
    within the given tolerance for start and end times, and if the From and To values match exactly.
    """
    return (abs(pred_event["Start Time [s]"] - gt_event["Start Time [s]"]) <= tolerance and
            abs(pred_event["End Time [s]"] - gt_event["End Time [s]"]) <= tolerance and
            pred_event["From"] == gt_event["From"] and
            pred_event["To"] == gt_event["To"])

def evaluate_predictions_with_uncertainty(pred_df, gt_df, tolerance):
    """
    For each predicted event in pred_df, determine if it matches any ground truth event in gt_df.
    
    Returns:
      - confusion: a dictionary with TP, FP, and FN counts.
      - correct_uncertainties: list of uncertainty values for predicted events that are true positives.
      - incorrect_uncertainties: list of uncertainty values for predicted events that are false positives.
    
    Here, a predicted event is considered correct (TP) if there is at least one ground truth event
    that matches its Start and End times within the given tolerance and has the same From and To values.
    Ground truth events not matched by any prediction are counted as FN.
    """
    TP = 0
    FP = 0
    matched_gt = set()  # Indices of ground truth events that were matched
    correct_uncertainties = []
    incorrect_uncertainties = []
    
    # For each predicted event, try to find a matching ground truth event.
    for i, pred in pred_df.iterrows():
        match_found = False
        for j, gt in gt_df.iterrows():
            if event_matches(pred, gt, tolerance):
                match_found = True
                matched_gt.add(j)
                break  # Stop at first match for this prediction.
        if match_found:
            TP += 1
            correct_uncertainties.append(pred["uncertainty"])
        else:
            FP += 1
            incorrect_uncertainties.append(pred["uncertainty"])
    
    FN = len(gt_df) - len(matched_gt)
    
    confusion = {"TP": TP, "FP": FP, "FN": FN}
    return confusion, correct_uncertainties, incorrect_uncertainties

# Example data frames (replace these with your actual data)
# Set tolerance (in seconds) for matching event times.
tolerance = 7

# Evaluate predictions and get confusion metrics along with uncertainty values.
confusion, correct_unc, incorrect_unc = evaluate_predictions_with_uncertainty( df_filtered,event_data_passes_subset, tolerance)

print("Confusion Metrics:")
print(confusion)

# Optionally, plot the distribution of uncertainty values for correct vs. incorrect predictions.
plt.figure(figsize=(8,6))
plt.boxplot([correct_unc, incorrect_unc], labels=["Correct Predictions", "Incorrect Predictions"])
plt.ylabel("Uncertainty")
plt.title("Uncertainty Distribution: Correct vs. Incorrect Predictions")
plt.show()


In [None]:
import pandas as pd
import statsmodels.api as sm

# Create a DataFrame where 'error' is 0 for correct and 1 for error:
df_correct = pd.DataFrame({"uncertainty": correct_unc, "error": 0})
df_incorrect = pd.DataFrame({"uncertainty": incorrect_unc, "error": 1})
reg_df = pd.concat([df_correct, df_incorrect], ignore_index=True)

# Add an intercept column:
reg_df["intercept"] = 1.0
logit_model = sm.Logit(reg_df["error"], reg_df[["intercept", "uncertainty"]])
result = logit_model.fit(disp=False)  # disp=False to suppress fitting output

print(result.summary())

# Extract the p-value for uncertainty:
p_value_uncertainty = result.pvalues["uncertainty"]
print(f"\nP-value for uncertainty coefficient: {p_value_uncertainty:.4f}")
