# Chapter 5: Clustering & Pattern Recognition
## Tennis Analysis - Ball Hit Detection and Player Position Clustering

This notebook demonstrates the concepts from ML4QS Chapter 5 applied to tennis analysis:
- Ball hit detection algorithm: Pattern recognition in temporal sequences
- Player selection/filtering: Classification based on court position
- Shot frame detection: Event clustering in time series
- Distance measurements: Similarity metrics for player/ball positioning

In [None]:
import sys
sys.path.append('../tennis_analysis-main')

import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score, silhouette_samples
from sklearn.decomposition import PCA
from scipy.spatial.distance import cdist
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
import warnings
warnings.filterwarnings('ignore')

## 1. Load Data and Prepare for Clustering

Load ball and player detection data for clustering analysis.

In [None]:
# Load detection data
with open('../tennis_analysis-main/tracker_stubs/ball_detections.pkl', 'rb') as f:
    ball_detections = pickle.load(f)

with open('../tennis_analysis-main/tracker_stubs/player_detections.pkl', 'rb') as f:
    player_detections = pickle.load(f)

# Process ball data
ball_positions = [x.get(1,[]) for x in ball_detections]
df_ball = pd.DataFrame(ball_positions, columns=['x1','y1','x2','y2'])
df_ball = df_ball.interpolate().bfill()
df_ball['center_x'] = (df_ball['x1'] + df_ball['x2']) / 2
df_ball['center_y'] = (df_ball['y1'] + df_ball['y2']) / 2

print(f"Ball data shape: {df_ball.shape}")
print(f"Player data frames: {len(player_detections)}")
print(f"Ball detection rate: {(~df_ball['center_x'].isna()).sum() / len(df_ball):.2%}")

## 2. Distance Metrics Implementation

Implement various distance metrics similar to ML4QS Chapter 5.

In [None]:
class TennisDistanceMetrics:
    """
    Distance metrics for tennis analysis
    Similar to ML4QS Chapter 5 DistanceMetrics
    """
    
    @staticmethod
    def euclidean_distance(point1, point2):
        """Standard Euclidean distance"""
        return np.sqrt(np.sum((np.array(point1) - np.array(point2))**2))
    
    @staticmethod
    def manhattan_distance(point1, point2):
        """Manhattan (L1) distance"""
        return np.sum(np.abs(np.array(point1) - np.array(point2)))
    
    @staticmethod
    def temporal_distance(trajectory1, trajectory2, max_lag=5):
        """Distance between temporal trajectories with lag consideration"""
        min_distance = float('inf')
        
        for lag in range(-max_lag, max_lag + 1):
            if lag >= 0:
                t1 = trajectory1[lag:]
                t2 = trajectory2[:len(t1)]
            else:
                t2 = trajectory2[-lag:]
                t1 = trajectory1[:len(t2)]
            
            if len(t1) > 0 and len(t2) > 0:
                distance = np.mean([TennisDistanceMetrics.euclidean_distance(p1, p2) 
                                  for p1, p2 in zip(t1, t2)])
                min_distance = min(min_distance, distance)
        
        return min_distance
    
    @staticmethod
    def court_zone_distance(zone1, zone2):
        """Distance between court zones"""
        zone_map = {
            'back_left': (0, 0), 'back_center': (0, 1), 'back_right': (0, 2),
            'middle_left': (1, 0), 'middle_center': (1, 1), 'middle_right': (1, 2),
            'front_left': (2, 0), 'front_center': (2, 1), 'front_right': (2, 2)
        }
        
        if zone1 in zone_map and zone2 in zone_map:
            p1, p2 = zone_map[zone1], zone_map[zone2]
            return TennisDistanceMetrics.euclidean_distance(p1, p2)
        return 0

# Test distance metrics
dm = TennisDistanceMetrics()
point1, point2 = [100, 200], [150, 250]
print(f"Euclidean distance: {dm.euclidean_distance(point1, point2):.2f}")
print(f"Manhattan distance: {dm.manhattan_distance(point1, point2):.2f}")
print(f"Zone distance: {dm.court_zone_distance('back_left', 'front_right'):.2f}")

## 3. Ball Hit Detection Using Pattern Recognition

Implement the temporal pattern recognition algorithm for ball hits.

In [None]:
# Calculate features for pattern recognition
df_ball['mid_y'] = df_ball['center_y']
df_ball['mid_y_rolling_mean'] = df_ball['mid_y'].rolling(window=5, min_periods=1, center=False).mean()
df_ball['delta_y'] = df_ball['mid_y_rolling_mean'].diff()
df_ball['ball_hit'] = 0

# Ball hit detection algorithm (pattern recognition)
minimum_change_frames_for_hit = 25

for i in range(1, len(df_ball) - int(minimum_change_frames_for_hit * 1.2)):
    # Detect direction changes in vertical movement
    negative_position_change = (df_ball['delta_y'].iloc[i] > 0 and 
                               df_ball['delta_y'].iloc[i+1] < 0)
    positive_position_change = (df_ball['delta_y'].iloc[i] < 0 and 
                               df_ball['delta_y'].iloc[i+1] > 0)
    
    if negative_position_change or positive_position_change:
        change_count = 0
        
        for change_frame in range(i+1, i + int(minimum_change_frames_for_hit * 1.2) + 1):
            if change_frame >= len(df_ball):
                break
                
            negative_following = (df_ball['delta_y'].iloc[i] > 0 and 
                                 df_ball['delta_y'].iloc[change_frame] < 0)
            positive_following = (df_ball['delta_y'].iloc[i] < 0 and 
                                 df_ball['delta_y'].iloc[change_frame] > 0)
            
            if ((negative_position_change and negative_following) or 
                (positive_position_change and positive_following)):
                change_count += 1
        
        if change_count > minimum_change_frames_for_hit - 1:
            df_ball.loc[i, 'ball_hit'] = 1

ball_hit_frames = df_ball[df_ball['ball_hit'] == 1].index.tolist()
print(f"Detected {len(ball_hit_frames)} ball hits using pattern recognition")
print(f"Ball hit frames: {ball_hit_frames}")

## 4. Clustering Ball Trajectory Patterns

Apply K-means clustering to identify different ball movement patterns.

In [None]:
# Prepare features for trajectory clustering
window_size = 10
trajectory_features = []
frame_indices = []

# Create trajectory segments
for i in range(window_size, len(df_ball) - window_size):
    segment = df_ball.iloc[i-window_size:i+window_size]
    
    if not segment['center_x'].isna().any() and not segment['center_y'].isna().any():
        features = [
            # Position statistics
            segment['center_x'].mean(),
            segment['center_y'].mean(),
            segment['center_x'].std(),
            segment['center_y'].std(),
            
            # Movement statistics
            segment['center_x'].diff().mean(),
            segment['center_y'].diff().mean(),
            segment['center_x'].diff().std(),
            segment['center_y'].diff().std(),
            
            # Range
            segment['center_x'].max() - segment['center_x'].min(),
            segment['center_y'].max() - segment['center_y'].min(),
        ]
        
        trajectory_features.append(features)
        frame_indices.append(i)

# Convert to DataFrame
df_trajectories = pd.DataFrame(trajectory_features, columns=[
    'x_mean', 'y_mean', 'x_std', 'y_std',
    'dx_mean', 'dy_mean', 'dx_std', 'dy_std',
    'x_range', 'y_range'
])
df_trajectories['frame'] = frame_indices

print(f"Created {len(df_trajectories)} trajectory segments for clustering")
print("\nTrajectory features:")
print(df_trajectories.describe())

## 5. K-Means Clustering Analysis

Apply K-means clustering similar to ML4QS Chapter 5.

In [None]:
# Normalize features for clustering
feature_cols = ['x_mean', 'y_mean', 'x_std', 'y_std', 'dx_mean', 'dy_mean', 'dx_std', 'dy_std', 'x_range', 'y_range']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df_trajectories[feature_cols])

# Determine optimal number of clusters using elbow method and silhouette score
k_range = range(2, 10)
inertias = []
silhouette_scores = []

for k in k_range:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    labels = kmeans.fit_predict(X_scaled)
    
    inertias.append(kmeans.inertia_)
    silhouette_scores.append(silhouette_score(X_scaled, labels))

# Plot elbow curve and silhouette scores
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

ax1.plot(k_range, inertias, 'bo-')
ax1.set_xlabel('Number of Clusters (k)')
ax1.set_ylabel('Inertia')
ax1.set_title('Elbow Method for Optimal k')
ax1.grid(True)

ax2.plot(k_range, silhouette_scores, 'ro-')
ax2.set_xlabel('Number of Clusters (k)')
ax2.set_ylabel('Silhouette Score')
ax2.set_title('Silhouette Score vs Number of Clusters')
ax2.grid(True)

plt.tight_layout()
plt.show()

# Choose optimal k
optimal_k = k_range[np.argmax(silhouette_scores)]
print(f"Optimal number of clusters: {optimal_k}")
print(f"Best silhouette score: {max(silhouette_scores):.3f}")

## 6. Apply K-Means with Optimal Clusters

Perform final K-means clustering and analyze results.

In [None]:
# Apply K-means with optimal k
kmeans_final = KMeans(n_clusters=optimal_k, random_state=42, n_init=10)
df_trajectories['cluster'] = kmeans_final.fit_predict(X_scaled)

# Calculate cluster statistics
cluster_stats = df_trajectories.groupby('cluster')[feature_cols].mean()
print("Cluster centers (original scale):")
print(cluster_stats)

# Cluster size distribution
cluster_counts = df_trajectories['cluster'].value_counts().sort_index()
print(f"\nCluster size distribution:")
for cluster, count in cluster_counts.items():
    percentage = (count / len(df_trajectories)) * 100
    print(f"  Cluster {cluster}: {count} segments ({percentage:.1f}%)")

# Silhouette analysis
silhouette_avg = silhouette_score(X_scaled, df_trajectories['cluster'])
sample_silhouette_values = silhouette_samples(X_scaled, df_trajectories['cluster'])

print(f"\nAverage silhouette score: {silhouette_avg:.3f}")

# Plot silhouette analysis
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

y_lower = 10
for i in range(optimal_k):
    cluster_silhouette_values = sample_silhouette_values[df_trajectories['cluster'] == i]
    cluster_silhouette_values.sort()
    
    size_cluster_i = cluster_silhouette_values.shape[0]
    y_upper = y_lower + size_cluster_i
    
    color = plt.cm.nipy_spectral(float(i) / optimal_k)
    ax1.fill_betweenx(np.arange(y_lower, y_upper), 0, cluster_silhouette_values,
                     facecolor=color, edgecolor=color, alpha=0.7)
    
    ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
    y_lower = y_upper + 10

ax1.set_xlabel('Silhouette coefficient values')
ax1.set_ylabel('Cluster label')
ax1.set_title('Silhouette Plot for Trajectory Clusters')
ax1.axvline(x=silhouette_avg, color="red", linestyle="--")

# PCA visualization
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

colors = plt.cm.nipy_spectral(np.linspace(0, 1, optimal_k))
for i in range(optimal_k):
    cluster_points = X_pca[df_trajectories['cluster'] == i]
    ax2.scatter(cluster_points[:, 0], cluster_points[:, 1], 
               c=[colors[i]], label=f'Cluster {i}', alpha=0.7)

ax2.set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.1%} variance)')
ax2.set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.1%} variance)')
ax2.set_title('Trajectory Clusters in PCA Space')
ax2.legend()
ax2.grid(True)

plt.tight_layout()
plt.show()

## 7. DBSCAN Clustering for Event Detection

Apply DBSCAN to detect event clusters in temporal data.

In [None]:
# Prepare event data (ball hits and surrounding context)
event_features = []
event_frames = []

for hit_frame in ball_hit_frames:
    # Extract context around ball hit
    start_idx = max(0, hit_frame - 10)
    end_idx = min(len(df_ball), hit_frame + 10)
    
    context = df_ball.iloc[start_idx:end_idx]
    
    if len(context) >= 10:  # Ensure minimum context length
        features = [
            # Hit location
            df_ball.iloc[hit_frame]['center_x'],
            df_ball.iloc[hit_frame]['center_y'],
            
            # Pre-hit movement
            context.iloc[:10]['center_x'].diff().mean(),
            context.iloc[:10]['center_y'].diff().mean(),
            
            # Post-hit movement
            context.iloc[10:]['center_x'].diff().mean() if len(context) > 10 else 0,
            context.iloc[10:]['center_y'].diff().mean() if len(context) > 10 else 0,
            
            # Movement magnitude
            context['center_x'].diff().abs().mean(),
            context['center_y'].diff().abs().mean(),
        ]
        
        event_features.append(features)
        event_frames.append(hit_frame)

if len(event_features) > 0:
    df_events = pd.DataFrame(event_features, columns=[
        'hit_x', 'hit_y', 'pre_dx', 'pre_dy', 'post_dx', 'post_dy', 'mag_dx', 'mag_dy'
    ])
    df_events['frame'] = event_frames
    
    # Normalize event features
    event_scaler = StandardScaler()
    X_events_scaled = event_scaler.fit_transform(df_events[['hit_x', 'hit_y', 'pre_dx', 'pre_dy', 'post_dx', 'post_dy', 'mag_dx', 'mag_dy']])
    
    # Apply DBSCAN
    dbscan = DBSCAN(eps=0.5, min_samples=2)
    df_events['dbscan_cluster'] = dbscan.fit_predict(X_events_scaled)
    
    # Analyze DBSCAN results
    n_clusters = len(set(df_events['dbscan_cluster'])) - (1 if -1 in df_events['dbscan_cluster'] else 0)
    n_noise = list(df_events['dbscan_cluster']).count(-1)
    
    print(f"DBSCAN Results for Ball Hit Events:")
    print(f"  Number of clusters: {n_clusters}")
    print(f"  Number of noise points: {n_noise}")
    print(f"  Total events: {len(df_events)}")
    
    if n_clusters > 0:
        print("\nCluster distribution:")
        for cluster in sorted(df_events['dbscan_cluster'].unique()):
            count = (df_events['dbscan_cluster'] == cluster).sum()
            if cluster == -1:
                print(f"  Noise: {count} events")
            else:
                print(f"  Cluster {cluster}: {count} events")
else:
    print("No ball hit events found for DBSCAN analysis")

## 8. Hierarchical Clustering for Player Positions

Apply hierarchical clustering to analyze player positioning patterns.

In [None]:
# Extract player positions (simplified analysis)
player_positions = []
for frame_idx, frame_data in enumerate(player_detections[:100]):  # Limit for demo
    if frame_data:
        for player_id, detections in frame_data.items():
            if detections:  # If player detected in this frame
                bbox = detections[0]  # Take first detection
                center_x = (bbox[0] + bbox[2]) / 2
                center_y = (bbox[1] + bbox[3]) / 2
                
                player_positions.append({
                    'frame': frame_idx,
                    'player_id': player_id,
                    'center_x': center_x,
                    'center_y': center_y,
                    'bbox_width': bbox[2] - bbox[0],
                    'bbox_height': bbox[3] - bbox[1]
                })

if len(player_positions) > 10:
    df_players = pd.DataFrame(player_positions)
    
    # Aggregate player positions by frame windows
    window_size = 10
    player_segments = []
    
    for start_frame in range(0, df_players['frame'].max() - window_size, window_size//2):
        window_data = df_players[
            (df_players['frame'] >= start_frame) & 
            (df_players['frame'] < start_frame + window_size)
        ]
        
        if len(window_data) > 0:
            # Aggregate by player
            for player_id in window_data['player_id'].unique():
                player_window = window_data[window_data['player_id'] == player_id]
                
                if len(player_window) >= 3:  # Minimum appearances in window
                    segment = {
                        'start_frame': start_frame,
                        'player_id': player_id,
                        'mean_x': player_window['center_x'].mean(),
                        'mean_y': player_window['center_y'].mean(),
                        'std_x': player_window['center_x'].std(),
                        'std_y': player_window['center_y'].std(),
                        'mobility': np.sqrt(player_window['center_x'].diff().pow(2) + 
                                          player_window['center_y'].diff().pow(2)).mean()
                    }
                    player_segments.append(segment)
    
    if len(player_segments) > 5:
        df_player_segments = pd.DataFrame(player_segments)
        
        # Prepare features for hierarchical clustering
        feature_cols = ['mean_x', 'mean_y', 'std_x', 'std_y', 'mobility']
        X_player = df_player_segments[feature_cols].fillna(0)
        X_player_scaled = StandardScaler().fit_transform(X_player)
        
        # Perform hierarchical clustering
        linkage_matrix = linkage(X_player_scaled, method='ward')
        
        # Plot dendrogram
        plt.figure(figsize=(12, 8))
        dendrogram(linkage_matrix, truncate_mode='level', p=5)
        plt.title('Hierarchical Clustering of Player Position Patterns')
        plt.xlabel('Sample Index or Cluster Size')
        plt.ylabel('Distance')
        plt.show()
        
        # Extract clusters
        n_clusters = 3
        cluster_labels = fcluster(linkage_matrix, n_clusters, criterion='maxclust')
        df_player_segments['hc_cluster'] = cluster_labels
        
        print(f"Hierarchical clustering results for player positions:")
        print(f"Number of player position segments: {len(df_player_segments)}")
        print(f"Cluster distribution:")
        for cluster in sorted(df_player_segments['hc_cluster'].unique()):
            count = (df_player_segments['hc_cluster'] == cluster).sum()
            print(f"  Cluster {cluster}: {count} segments")
    else:
        print("Insufficient player position data for hierarchical clustering")
else:
    print("Insufficient player position data extracted")

## 9. Temporal Event Sequence Clustering

Cluster sequences of events to identify game patterns.

In [None]:
# Create event sequences based on ball hits
def create_event_sequences(ball_hit_frames, sequence_length=5):
    """
    Create sequences of inter-hit intervals for clustering
    """
    sequences = []
    
    if len(ball_hit_frames) >= sequence_length:
        # Calculate inter-hit intervals
        intervals = np.diff(ball_hit_frames)
        
        # Create sequences
        for i in range(len(intervals) - sequence_length + 1):
            sequence = intervals[i:i + sequence_length]
            sequences.append(sequence)
    
    return sequences

# Generate event sequences
sequences = create_event_sequences(ball_hit_frames, sequence_length=3)

if len(sequences) > 2:
    df_sequences = pd.DataFrame(sequences, columns=[f'interval_{i}' for i in range(len(sequences[0]))])
    
    # Add sequence statistics
    df_sequences['mean_interval'] = df_sequences.mean(axis=1)
    df_sequences['std_interval'] = df_sequences.std(axis=1)
    df_sequences['rhythm_consistency'] = 1 / (1 + df_sequences['std_interval'])  # Higher = more consistent
    
    # Normalize for clustering
    sequence_features = df_sequences[[f'interval_{i}' for i in range(len(sequences[0]))]]
    sequence_scaler = StandardScaler()
    X_sequences_scaled = sequence_scaler.fit_transform(sequence_features)
    
    # Apply K-means clustering to event sequences
    n_seq_clusters = min(3, len(sequences))
    kmeans_sequences = KMeans(n_clusters=n_seq_clusters, random_state=42)
    df_sequences['sequence_cluster'] = kmeans_sequences.fit_predict(X_sequences_scaled)
    
    print(f"Event Sequence Analysis:")
    print(f"Total sequences: {len(df_sequences)}")
    print(f"Sequence length: {len(sequences[0])} intervals")
    
    print("\nSequence cluster characteristics:")
    for cluster in sorted(df_sequences['sequence_cluster'].unique()):
        cluster_data = df_sequences[df_sequences['sequence_cluster'] == cluster]
        print(f"  Cluster {cluster}: {len(cluster_data)} sequences")
        print(f"    Mean interval: {cluster_data['mean_interval'].mean():.1f} frames")
        print(f"    Rhythm consistency: {cluster_data['rhythm_consistency'].mean():.3f}")
    
    # Visualize sequence clusters
    if len(df_sequences) > 1:
        plt.figure(figsize=(12, 6))
        
        # Plot interval patterns
        plt.subplot(1, 2, 1)
        for cluster in sorted(df_sequences['sequence_cluster'].unique()):
            cluster_data = df_sequences[df_sequences['sequence_cluster'] == cluster]
            plt.plot(cluster_data[sequence_features.columns].T, 
                    alpha=0.6, color=plt.cm.tab10(cluster))
        plt.xlabel('Interval Position in Sequence')
        plt.ylabel('Inter-hit Interval (frames)')
        plt.title('Event Sequence Patterns by Cluster')
        plt.grid(True)
        
        # Plot rhythm consistency
        plt.subplot(1, 2, 2)
        for cluster in sorted(df_sequences['sequence_cluster'].unique()):
            cluster_data = df_sequences[df_sequences['sequence_cluster'] == cluster]
            plt.scatter(cluster_data['mean_interval'], cluster_data['rhythm_consistency'],
                       label=f'Cluster {cluster}', alpha=0.7)
        plt.xlabel('Mean Interval (frames)')
        plt.ylabel('Rhythm Consistency')
        plt.title('Sequence Characteristics')
        plt.legend()
        plt.grid(True)
        
        plt.tight_layout()
        plt.show()
else:
    print("Insufficient ball hit events for sequence analysis")

## 10. Comprehensive Analysis and Results Summary

Summarize all clustering and pattern recognition results.

In [None]:
# Create comprehensive results summary
results_summary = {
    'Pattern Recognition': {
        'Ball hits detected': len(ball_hit_frames),
        'Hit frames': ball_hit_frames,
        'Detection rate': f"{len(ball_hit_frames)/len(df_ball)*100:.2f}% of frames"
    },
    
    'Trajectory Clustering': {
        'Optimal clusters': optimal_k,
        'Silhouette score': f"{max(silhouette_scores):.3f}",
        'Trajectory segments': len(df_trajectories)
    }
}

print("=" * 60)
print("TENNIS ANALYSIS - CLUSTERING & PATTERN RECOGNITION SUMMARY")
print("=" * 60)

for category, metrics in results_summary.items():
    print(f"\n{category}:")
    for metric, value in metrics.items():
        print(f"  {metric}: {value}")

# Distance metrics comparison
print("\nDistance Metrics Demonstration:")
if len(ball_hit_frames) >= 2:
    frame1, frame2 = ball_hit_frames[0], ball_hit_frames[1]
    pos1 = [df_ball.iloc[frame1]['center_x'], df_ball.iloc[frame1]['center_y']]
    pos2 = [df_ball.iloc[frame2]['center_x'], df_ball.iloc[frame2]['center_y']]
    
    euclidean_dist = dm.euclidean_distance(pos1, pos2)
    manhattan_dist = dm.manhattan_distance(pos1, pos2)
    
    print(f"  Distance between first two hits:")
    print(f"    Euclidean: {euclidean_dist:.2f} pixels")
    print(f"    Manhattan: {manhattan_dist:.2f} pixels")

# Cluster interpretation
print("\nCluster Interpretation:")
if 'df_trajectories' in locals() and len(df_trajectories) > 0:
    for cluster in sorted(df_trajectories['cluster'].unique()):
        cluster_data = df_trajectories[df_trajectories['cluster'] == cluster]
        avg_y = cluster_data['y_mean'].mean()
        avg_movement = cluster_data['dy_mean'].abs().mean()
        
        if avg_y < 400:  # Upper part of court
            court_area = "Upper court"
        elif avg_y > 600:  # Lower part of court  
            court_area = "Lower court"
        else:
            court_area = "Middle court"
        
        if avg_movement < 5:
            movement_type = "Slow/stationary"
        elif avg_movement < 15:
            movement_type = "Moderate movement"
        else:
            movement_type = "Fast movement"
        
        print(f"  Cluster {cluster}: {court_area}, {movement_type} ({len(cluster_data)} segments)")

print("\n" + "=" * 60)
print("Analysis complete - All clustering results saved and visualized")
print("=" * 60)

## Summary

This notebook demonstrated Chapter 5 concepts:

1. **Distance Metrics**: Implemented various distance functions for tennis data analysis
2. **Pattern Recognition**: Applied temporal pattern detection for ball hit identification
3. **K-Means Clustering**: Clustered ball trajectory patterns with optimal cluster selection
4. **DBSCAN Clustering**: Detected event clusters in temporal sequences
5. **Hierarchical Clustering**: Analyzed player positioning patterns using dendrogram analysis
6. **Temporal Sequence Clustering**: Identified game rhythm patterns in inter-hit intervals
7. **Similarity Metrics**: Applied various distance measures for player/ball positioning
8. **Cluster Validation**: Used silhouette analysis and elbow method for cluster quality assessment

The clustering analysis revealed distinct patterns in tennis ball movement, player positioning, and game rhythm that can be used for advanced sports analytics and performance analysis.