**requirements**

In [1]:
pip install pulp

Collecting pulp
  Downloading PuLP-2.9.0-py3-none-any.whl.metadata (5.4 kB)
Downloading PuLP-2.9.0-py3-none-any.whl (17.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.7/17.7 MB[0m [31m82.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: pulp
Successfully installed pulp-2.9.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.21-py3-none-any.whl.metadata (34 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.9-py3-none-any.whl.metadata (9.3 kB)
Downloading ultralytics-8.3.21-py3-none-any.whl (877 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m877.1/877.1 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading ultralytics_thop-2.0.9-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.21 ultralytics-thop-2.0.9
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install supervision

Collecting supervision
  Downloading supervision-0.24.0-py3-none-any.whl.metadata (14 kB)
Downloading supervision-0.24.0-py3-none-any.whl (158 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m158.2/158.2 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: supervision
Successfully installed supervision-0.24.0
Note: you may need to restart the kernel to use updated packages.


**camera_movement_estimator**

In [4]:
import cv2
import numpy as np

class CameraMovementEstimator():
    def __init__(self, frame):
        self.minimum_distance = 5

        self.lk_params = dict(
            winSize = (15, 15),
            maxLevel = 2,
            criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)
        )

        first_frame_grayscale = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        mask_features = np.zeros_like(first_frame_grayscale)
        mask_features[:, 0:20] = 1
        mask_features[:, 900:1050] = 1

        self.features = dict(
            maxCorners = 100,
            qualityLevel = 0.3,
            minDistance = 3,
            blockSize = 7,
            mask = mask_features
        )

    def add_adjust_positions_to_tracks(self, tracks, camera_movement_per_frame):
        for object, object_tracks in tracks.items():
            for frame_num, track in enumerate(object_tracks):
                for track_id, track_info in track.items():
                    position = track_info['position']
                    camera_movement = camera_movement_per_frame[frame_num]
                    position_adjusted = (position[0] - camera_movement[0], position[1] - camera_movement[1])
                    tracks[object][frame_num][track_id]['position_adjusted'] = position_adjusted

    def get_camera_movement(self, frames):
        camera_movement = [[0, 0]] * len(frames)

        old_gray = cv2.cvtColor(frames[0], cv2.COLOR_BGR2GRAY)
        old_features = cv2.goodFeaturesToTrack(old_gray, **self.features)

        if old_features is None or len(old_features) == 0:
            print("No features detected in the first frame.")
            return camera_movement

        for frame_num in range(1, len(frames)):
            frame_gray = cv2.cvtColor(frames[frame_num], cv2.COLOR_BGR2GRAY)
            
            # Ensure old_features has valid points
            if old_features is None or len(old_features) == 0:
                print(f"No features to track at frame {frame_num}")
                old_features = cv2.goodFeaturesToTrack(frame_gray, **self.features)
                if old_features is None:
                    print("Still no features to track, skipping frame")
                    continue

            # Compute optical flow
            new_features, status, _ = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, old_features, None, **self.lk_params)

            if new_features is None:
                print(f"No new features found at frame {frame_num}")
                continue

            max_distance = 0
            camera_movement_x, camera_movement_y = 0, 0

            for i, (new, old) in enumerate(zip(new_features, old_features)):
                if status[i]:  # Only process points with valid status
                    new_features_point = new.ravel()
                    old_features_point = old.ravel()

                    distance = measure_distance(new_features_point, old_features_point)
                    if distance > max_distance:
                        max_distance = distance
                        camera_movement_x, camera_movement_y = measure_xy_distance(old_features_point, new_features_point)

            if max_distance > self.minimum_distance:
                camera_movement[frame_num] = [camera_movement_x, camera_movement_y]
                old_features = cv2.goodFeaturesToTrack(frame_gray, **self.features)

            old_gray = frame_gray.copy()

        return camera_movement

**event_process**

In [5]:
import pandas as pd
import numpy as np

class EventProcessor:
    def __init__(self, tracks, yolo_detections, df):
        """
        Initialize the EventProcessor with tracking data, YOLO detections, and a DataFrame for storing results.
        :param tracks: Dictionary containing the tracking data of players.
        :param yolo_detections: YOLO model results from the prediction (list of lists).
        :param df: A Pandas DataFrame to store player statistics (dribbles, tackles, aerial_duels, injuries).
        """
        self.tracks = tracks
        self.yolo_detections = yolo_detections
        self.df = df  # Reference to the DataFrame

    def find_closest_player(self, event_center, frame_num):
        distances = []
    
        # Iterate over player detections in the given frame
        for player_id, player_data in self.tracks['players'][frame_num].items():
            player_bbox = player_data['bbox']
            player_center = get_center_of_bbox(player_bbox)
            distance = np.linalg.norm(np.array(event_center) - np.array(player_center))
            distances.append((player_id, distance))

        # Sort by distance
        distances.sort(key=lambda x: x[1])

        # Check if distances list is not empty before accessing
        if len(distances) > 0:
            closest_player = distances[0][0]
            return closest_player
        else:
            print(f"No players found in frame {frame_num}.")
            return None  # Handle case where no players are detected

    def find_ball_possessor(self, frame_num):
        """
        Find the player who has possession of the ball in a given frame.
        :param frame_num: The frame number being processed.
        :return: The player ID of the ball possessor or None if no one has the ball.
        """
        if frame_num < 0 or frame_num >= len(self.tracks['players']):
            return None

        for player_id, data in self.tracks['players'][frame_num].items():
            if data.get('has_ball', False):  # Assuming 'has_ball' is True if the player has the ball
                return player_id
        return None

    def update_statistics(self, player_id, column_name):
        """
        Update the specified column in the DataFrame for a given player.
        :param player_id: The ID of the player.
        :param column_name: The column to update (e.g., 'dribble_success', 'injuries').
        """
        initialized_columns = ['dribble_attempt', 'dribble_success', 'dribble_failure', 'dribbled_past',
                               'offensive_success', 'offensive_failure', 'defensive_success', 'defensive_failure',
                               'tackling_success', 'tackling_failure', 'injuries']

        if player_id not in self.df.index or self.df.loc[player_id, initialized_columns].isnull().any():
            self.df.loc[player_id] = {'dribble_attempt': 0, 'dribble_success': 0, 'dribble_failure': 0, 'dribbled_past': 0,
                                      'offensive_success': 0, 'offensive_failure': 0, 'defensive_success': 0, 'defensive_failure': 0,
                                      'Tackles_attempted': 0, 'tackling_success': 0, 'tackling_failure': 0, 'injuries': 0}
        self.df.at[player_id, column_name] += 1

    def group_event_frames(self, event_name, min_gap=20):
        """
        Group consecutive frames for the same event into a list of events, allowing for a gap of up to `min_gap` frames.
    
        :param event_name: The name of the event (e.g., 'dribble', 'tackle', 'aerial_duel', 'injury').
        :param min_gap: Maximum number of frames to allow as a gap between two parts of the same event.
        :return: A list of grouped events, each containing the start and end frame of the event.
        """
        event_class_ids = {
            'dribble': 1,  # Example mapping of event names to class IDs
            'tackle': 6,
            'aerial_duel': 0,
            'injury': 4
        }

        event_class_id = event_class_ids.get(event_name)
        if event_class_id is None:
            return []  # Return empty if the event name is not valid

        grouped_events = []
        ongoing_events = {}  # Dictionary to track ongoing events: {track_id: (start_frame, last_frame)}
        gap_count = {}  # Dictionary to track gaps for each event
    
        # Iterate through the frames and detect the events
        for frame_num, detections in enumerate(self.yolo_detections):
            # Filter out only the detections relevant to the event
            relevant_detections = [detection for detection in detections if int(detection[5]) == event_class_id]

            # Process each event in the frame separately
            for detection in relevant_detections:
                track_id = int(detection[4])  # Assuming detection[4] is the unique identifier for the event (like a player ID)

                # If the event is already ongoing, update its end frame
                if track_id in ongoing_events:
                    ongoing_events[track_id] = (ongoing_events[track_id][0], frame_num)  # Update end frame
                    gap_count[track_id] = 0  # Reset the gap counter for this event
                else:
                    # Start a new event for this track_id with current frame as start_frame and end_frame
                    ongoing_events[track_id] = (frame_num, frame_num)
                    gap_count[track_id] = 0  # Initialize the gap counter for this event

            # Handle gaps for all ongoing events
            for track_id in list(ongoing_events.keys()):  # Use list() to avoid modifying the dictionary while iterating
                if track_id not in [int(detection[4]) for detection in relevant_detections]:
                    # No detection for this track_id in the current frame, increment the gap counter
                    gap_count[track_id] += 1
                    if gap_count[track_id] > min_gap:
                        # End the event if the gap exceeds the allowed size, and append start and end frames
                        start_frame, end_frame = ongoing_events[track_id]
                        grouped_events.append((start_frame, end_frame))
                        del ongoing_events[track_id]  # Remove the completed event
                        del gap_count[track_id]  # Remove the gap counter for this event

        # Append any remaining ongoing events after looping through frames
        for track_id, (start_frame, end_frame) in ongoing_events.items():
            grouped_events.append((start_frame, end_frame))

        return grouped_events


    def process_dribble(self):
        """
        Process grouped dribble events, update dribble attempts and success/failure stats.
        """
        grouped_dribbles = self.group_event_frames('dribble')
        print(len(grouped_dribbles))
        for start_frame, end_frame in grouped_dribbles:
            dribbler = self.find_ball_possessor(start_frame - 1 if start_frame > 0 else start_frame)

            if dribbler is not None:
                self.update_statistics(dribbler, 'dribble_attempt')

                if end_frame + 1 < len(self.tracks['players']):
                    player_with_ball_after = self.find_ball_possessor(end_frame + 1)
                else:
                    player_with_ball_after = None

                if player_with_ball_after == dribbler:
                    self.update_statistics(dribbler, 'dribble_success')
                    event_bbox = self.yolo_detections[end_frame][0][:4]
                    defender = self.find_closest_player(get_center_of_bbox(event_bbox), end_frame)
                    if defender is not None:
                        self.update_statistics(defender, 'dribbled_past')
                else:
                    self.update_statistics(dribbler, 'dribble_failure')

    def process_aerial_duel(self):
        """
        Process grouped aerial duel events, update offensive and defensive success/failure stats.
        """
        grouped_aerial_duels = self.group_event_frames('aerial_duel')

        for start_frame, end_frame in grouped_aerial_duels:
            attacker = self.find_ball_possessor(start_frame - 1 if start_frame > 0 else start_frame)

            if attacker is not None:
                attacker_team = self.tracks['players'][start_frame][attacker]['team']

                if end_frame + 1 < len(self.tracks['players']):
                    player_with_ball_after = self.find_ball_possessor(end_frame + 1)
                else:
                    player_with_ball_after = None

                event_bbox = self.yolo_detections[end_frame][0][:4]
                event_center = get_center_of_bbox(event_bbox)
                defender = self.find_second_closest_opponent(event_center, end_frame, attacker_team)

                if player_with_ball_after == attacker:
                    self.update_statistics(attacker, 'offensive_success')
                    if defender is not None:
                        self.update_statistics(defender, 'defensive_failure')
                else:
                    self.update_statistics(attacker, 'offensive_failure')
                    if defender is not None:
                        self.update_statistics(defender, 'defensive_success')

    def find_second_closest_opponent(self, event_center, frame_num, attacker_team):
        """
        Find the second closest player to the event who is on the opposing team (likely the defender).
        """
        distances = []

        for player_id, player_data in self.tracks['players'][frame_num].items():
            if player_data['team'] != attacker_team:
                player_bbox = player_data['bbox']
                player_center = get_center_of_bbox(player_bbox)
                distance = np.linalg.norm(np.array(event_center) - np.array(player_center))
                distances.append((player_id, distance))

        distances.sort(key=lambda x: x[1])
    
        return distances[1][0] if len(distances) > 1 else None
    
    def process_tackle(self):
        """
        Process grouped tackle events, update tackling success/failure stats.
        """
        grouped_tackles = self.group_event_frames('tackle')

        for start_frame, end_frame in grouped_tackles:
            tackled_player = self.find_ball_possessor(start_frame - 1 if start_frame > 0 else start_frame)

            if tackled_player is not None:
                tackled_team = self.tracks['players'][start_frame][tackled_player]['team']

                if end_frame + 1 < len(self.tracks['players']):
                    player_with_ball_after = self.find_ball_possessor(end_frame + 1)
                else:
                    player_with_ball_after = None

                event_bbox = self.yolo_detections[end_frame][0][:4]
                event_center = get_center_of_bbox(event_bbox)
                tackler = self.find_second_closest_opponent(event_center, end_frame, tackled_team)

                if player_with_ball_after != tackled_player:
                    # Tackle success
                    self.update_statistics(tackler, 'Tackles_attempted')
                    if tackler is not None:
                        self.update_statistics(tackler, 'tackling_success')
                else:
                    # Tackle failure for the tackler
                    if tackler is not None:
                        self.update_statistics(tackler, 'tackling_failure')

    def process_injury(self):
        """
        Process grouped injury events, update injury stats.
        """
        grouped_injuries = self.group_event_frames('injury')

        for start_frame, end_frame in grouped_injuries:
            event_bbox = self.yolo_detections[end_frame][0][:4]
            injured_player = self.find_closest_player(get_center_of_bbox(event_bbox), end_frame)

            if injured_player is not None:
                # Update injury statistics for the injured player
                self.update_statistics(injured_player, 'injuries')

    def process_frames_in_batches(self):
        """
        Process events in batches of frames.
        :param batch_size: The number of frames to process in each batch.
        """

        self.process_dribble()
        self.process_aerial_duel()
        self.process_tackle()
        self.process_injury()

        return self.df

**formation_detector**

In [6]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from collections import Counter

class FormationDetector:
    def __init__(self, tracks, possible_formations, team_df):
        """
        Initialize the FormationDetector with tracking data, a list of possible formations, and a DataFrame.
        :param tracks: Dictionary containing the tracking data of players.
        :param possible_formations: List of possible formations (e.g., ['4-3-3', '4-4-2', '4-2-3-1']).
        :param team_df: DataFrame containing team information where formations will be assigned.
        """
        self.tracks = tracks
        self.possible_formations = possible_formations
        self.team_df = team_df  # DataFrame where formation will be assigned

    def average_positions(self, frame_range, team):
        """
        Compute the average positions of players over a range of frames for a specific team.
        :param frame_range: List of frame numbers to process (e.g., 20 frames).
        :param team: The team for which to compute the average positions.
        :return: A dictionary with player IDs as keys and their average positions (x, y) as values.
        """
        positions = {}

        for frame_num in frame_range:
            for player_id, player_data in self.tracks['players'][frame_num].items():
                if player_data['team'] == team:
                    if player_id not in positions:
                        positions[player_id] = {'x': [], 'y': []}
                    positions[player_id]['x'].append(player_data['position_adjusted'][0])  # Player's x-coordinate
                    positions[player_id]['y'].append(player_data['position_adjusted'][1])  # Player's y-coordinate

        # Compute the average position for each player
        average_positions = {
            player_id: (
                np.mean(positions[player_id]['x']),
                np.mean(positions[player_id]['y'])
            )
            for player_id in positions
        }

        return average_positions

    def cluster_positions(self, average_positions, num_clusters):
        """
        Cluster the players' average positions based on the number of clusters.
        :param average_positions: Dictionary of player IDs and their average positions (x, y).
        :param num_clusters: Number of clusters (defense, midfield, attack, etc.) based on the formation.
        :return: A dictionary with cluster labels (0, 1, 2, ...) as keys and lists of player IDs as values.
        """
        player_ids = list(average_positions.keys())
        positions = np.array(list(average_positions.values()))

        # Use K-Means to cluster players into groups based on the number of clusters required by the formation
        kmeans = KMeans(n_clusters=num_clusters, init="k-means++", n_init='auto')
        labels = kmeans.fit_predict(positions)

        clusters = {i: [] for i in range(num_clusters)}
        for i, player_id in enumerate(player_ids):
            clusters[labels[i]].append(player_id)

        return clusters

    def parse_formation(self, formation):
        """
        Parse a formation string (e.g., '4-2-3-1') into a list of numbers representing players in each group.
        :param formation: A formation string (e.g., '4-2-3-1').
        :return: A list of integers representing the formation (e.g., [4, 2, 3, 1]).
        """
        return [int(num) for num in formation.split('-')]

    def determine_formation(self, clusters, formation):
        """
        Determine if the clusters match the given formation.
        :param clusters: A dictionary with cluster labels (0, 1, 2, ...) and lists of player IDs.
        :param formation: A list of integers representing the expected formation (e.g., [4, 2, 3, 1]).
        :return: Boolean indicating if the detected cluster sizes match the expected formation.
        """
        cluster_sizes = sorted([len(clusters[i]) for i in clusters])

        # Sort the formation as well to avoid ordering issues
        formation_sorted = sorted(formation)

        return cluster_sizes == formation_sorted

    def find_best_formation(self, clusters):
        """
        Find the best matching formation from the list of possible formations.
        :param clusters: A dictionary with cluster labels (0, 1, 2, ...) and lists of player IDs.
        :return: The best matching formation string (e.g., '4-3-3') or 'Unknown' if no match is found.
        """
        for formation in self.possible_formations:
            parsed_formation = self.parse_formation(formation)
            if self.determine_formation(clusters, parsed_formation):
                return formation
        return 'Unknown'

    def find_optimal_cluster_and_formation(self, average_positions):
        """
        Try clustering with 3, 4, and 5 clusters, and find the best matching formation.
        :param average_positions: A dictionary of player IDs and their average positions (x, y).
        :return: The best matching formation and the number of clusters used.
        """
        best_formation = 'Unknown'
        best_num_clusters = 3

        for num_clusters in range(3, 6):  # Try clustering with 3, 4, and 5 clusters
            if len(average_positions) >= num_clusters:
                clusters = self.cluster_positions(average_positions, num_clusters)
                formation = self.find_best_formation(clusters)
                if formation != 'Unknown':
                    best_formation = formation
                    best_num_clusters = num_clusters
                    break  # If a match is found, break out of the loop

        return best_formation, best_num_clusters

    def process_frames_in_batches(self, batch_size=20):
        """
        Process the player formations for each team in batches of frames.
        :param batch_size: Number of frames to process in each batch (default is 20).
        :return: team_df updated with the most common formation for each team.
        """
        total_frames = len(self.tracks['players'])

        # Initialize dictionaries to store formations for each team
        team_formations = {1: [], 2: []}

        for start_frame in range(0, total_frames, batch_size):
            end_frame = min(start_frame + batch_size, total_frames)
            frame_range = list(range(start_frame, end_frame))

            for team in [1, 2]:  
                average_positions = self.average_positions(frame_range, team)

                # Try clustering with 3, 4, and 5 clusters and find the best formation
                best_formation, best_num_clusters = self.find_optimal_cluster_and_formation(average_positions)

                # Collect formations that aren't 'Unknown' for each team
                if best_formation != 'Unknown':
                    team_formations[team].append(best_formation)

        # For each team, find the most common formation and assign it to the team_df
        for team_id in self.team_df.index:
            if team_formations[team_id]:
                most_common_formation = Counter(team_formations[team_id]).most_common(1)[0][0]
            else:
                most_common_formation = 'Unknown'

            print(most_common_formation)
            # Assign the most common formation to the team_df for the respective team (using row index)
            if most_common_formation != 'Unknown':
                self.team_df.at[team_id, 'formations'] = str(most_common_formation)
            
        self.team_df['formations'] = self.team_df['formations'].astype(str)
        
        return self.team_df

**player_number_detector**

In [7]:
import cv2
import pandas as pd
import pytesseract
from ultralytics import YOLO
import numpy as np

class PlayerDetector:
    def __init__(self, number_model_path):
        self.number_model = YOLO(number_model_path)
        self.number_classes = [str(i) for i in range(10)]
    
    def enhance_image(self, image):
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        equalized = clahe.apply(gray)
        gamma = 1.5
        invGamma = 1.0 / gamma
        table = np.array([((i / 255.0) ** invGamma) * 255 for i in np.arange(0, 256)]).astype("uint8")
        enhanced_image = cv2.LUT(equalized, table)
        enhanced_image = cv2.cvtColor(enhanced_image, cv2.COLOR_GRAY2BGR)
        return enhanced_image

    def group_digits(self, number_boxes, number_confidences, number_class_ids, threshold_distance=15):
        # Sort number_boxes and corresponding confidences/class_ids by x1 coordinate to ensure left-to-right reading
        sorted_digits = sorted(zip(number_boxes, number_confidences, number_class_ids), key=lambda x: x[0][0])

        grouped_digits = []
        current_group = ''
        last_x2 = 0
        
        for number_box, number_confidence, number_class_id in sorted_digits:
            nx1, ny1, nx2, ny2 = int(number_box[0]), int(number_box[1]), int(number_box[2]), int(number_box[3])
            
            # Group digits based on proximity in the x-axis
            if nx1 - last_x2 < threshold_distance:
                current_group += str(int(number_class_id))
            else:
                if current_group:
                    grouped_digits.append(current_group)
                current_group = str(int(number_class_id))
            
            last_x2 = nx2
        
        if current_group:
            grouped_digits.append(current_group)
        
        return grouped_digits

    def detect_numbers(self, player_image):
        number_results = self.number_model(player_image)
        number_boxes = number_results[0].boxes.xyxy.cpu().numpy()
        number_confidences = number_results[0].boxes.conf.cpu().numpy()
        number_class_ids = number_results[0].boxes.cls.cpu().numpy()

        # Group nearby digits to form multi-digit numbers, ensuring left-to-right order
        shirt_numbers = self.group_digits(number_boxes, number_confidences, number_class_ids)
        
        return shirt_numbers


class PlayerShirtNumberTracker:
    def __init__(self, video_frames, tracks, df, number_model_path):
        self.video_frames = video_frames
        self.tracks = tracks
        self.df = df
        self.player_detector = PlayerDetector(number_model_path)

    def run(self):
        # Process every 5th frame
        for frame_num, frame in enumerate(self.video_frames):
            if frame_num % 5 != 0:  # Skip frames not divisible by 5
                continue
            
            tracked_players = self.tracks['players'][frame_num]
            
            for player_id, player_data in tracked_players.items():
                bbox = player_data['bbox']  # Use the tracked bounding box
                
                # Convert bounding box coordinates to integers
                x1, y1, x2, y2 = map(int, bbox)

                # Extract the player image using the bounding box
                player_image = frame[y1:y2, x1:x2]
                
                # Run the number detection model on the cropped player image
                shirt_numbers = self.player_detector.detect_numbers(player_image)
                
                if player_id is not None and shirt_numbers:
                    # Assign the detected shirt number(s) to the player in the DataFrame
                    self.df.at[player_id, 'shirt_number'] = shirt_numbers[0] if shirt_numbers else None

        return self.df

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


**pass_detector**

In [8]:
import pandas as pd

class PassDetector:
    def __init__(self, tracks, df):
        """
        Initialize the PassDetector with tracking data of players and the reference to a DataFrame.
        :param tracks: Dictionary containing the tracking data of players and ball positions.
        :param df: A Pandas DataFrame to store pass results with player IDs as rows and columns:
                   'pass_success', 'pass_failure', 'total_passes', 'interceptions'
        """
        self.tracks = tracks
        self.df = df  # Reference to the DataFrame
        self.last_player_with_the_ball = None
        self.last_frame_num = None
        
    def update_pass_results(self, player_id, success, interceptor_id=None):
        """
        Update the pass results for a specific player in the DataFrame.
        :param player_id: The ID of the player making the pass.
        :param success: Boolean indicating if the pass was successful (True) or failed (False).
        :param interceptor_id: ID of the player who intercepted the ball (if pass failed).
        """
        # List of columns that are being initialized by this function
        initialized_columns = ['Pass_Success', 'pass_failure', 'pass_failure', 'Interceptions']

        # Check if player_id exists in df or if any of the initialized columns for the player contain NaN values
        if player_id not in self.df.index or self.df.loc[player_id, initialized_columns].isnull().any():
            self.df.loc[player_id] = {
                'Pass_Success': 0, 
                'pass_failure': 0, 
                'pass_failure': 0, 
                'Interceptions': 0
            }

        if success:
            print("success")
            print(f'{player_id}')
            self.df.at[player_id, 'Pass_Success'] += 1
        else:
            print("failure")
            self.df.at[player_id, 'pass_failure'] += 1

            # If the pass failed, and an interceptor is identified, update their interception count
            if interceptor_id is not None:
                if interceptor_id not in self.df.index or self.df.loc[interceptor_id].isnull().any():
                    self.df.loc[interceptor_id] = {
                        'Pass_Success': 0, 
                        'pass_failure': 0, 
                        'Total_passes': 0, 
                        'Interceptions': 0
                    }
                self.df.at[interceptor_id, 'Interceptions'] += 1

        # Update total passes
        self.df.at[player_id, 'Total_passes'] = self.df.at[player_id, 'Pass_Success'] + self.df.at[player_id, 'pass_failure']

    def detect_pass(self, player_id, player_data, frame_num):
        """
        Detect passes frame by frame for a given player.
        :param player: The player object representing the player who currently has the ball.
        :param frame_num: The current frame number being analyzed.
        :return: None
        """
        if self.last_frame_num:
            current_team = self.tracks['players'][self.last_frame_num][self.last_player_with_the_ball]['team']
        
        # Handle cases where 'has_ball' is not set, assume False if missing
        has_ball = player_data.get('has_ball', False)
        
        # If the player has the ball in this frame
        if has_ball:
            print('has_ball')
            # Check if the ball was passed to a player
            if self.last_player_with_the_ball and self.last_player_with_the_ball != player_id:
                print('pass')
                next_team = self.tracks['players'][frame_num][player_id]['team']
                if self.last_frame_num and current_team == next_team:
                    # Successful pass
                    self.update_pass_results(self.last_player_with_the_ball, success=True)
                else:
                    # Failed pass, interception by the opposing player
                    self.update_pass_results(self.last_player_with_the_ball, success=False, interceptor_id=player_id)

            self.last_player_with_the_ball = player_id
            self.last_frame_num = frame_num
                
    def process_game_in_batches(self, batch_size=20):
        """
        Process the game frame by frame in batches to detect and classify passes.
        :param batch_size: Number of frames to process in each batch.
        :return: Updated DataFrame with pass results
        """
        total_frames = len(self.tracks['players'])
        
        for start_frame in range(0, total_frames, batch_size):
            end_frame = min(start_frame + batch_size, total_frames)  # Ensure we don't go out of bounds
            
            for frame_num in range(start_frame, end_frame):
                players_in_frame = self.tracks['players'][frame_num]
                for player_id, player_data in players_in_frame.items():
                    self.detect_pass(player_id, player_data, frame_num)

        # Return the updated DataFrame after processing
        return self.df

**player_ball_assigner**

In [9]:
class PlayerBallAssigner():
    def __init__(self):
        self.max_player_ball_distance = 70
    
    def assign_ball_to_player(self,players,ball_bbox):
        ball_position = get_center_of_bbox(ball_bbox)

        miniumum_distance = 99999
        assigned_player=-1

        for player_id, player in players.items():
            player_bbox = player['bbox']

            distance_left = measure_distance((player_bbox[0],player_bbox[-1]),ball_position)
            distance_right = measure_distance((player_bbox[2],player_bbox[-1]),ball_position)
            distance = min(distance_left,distance_right)

            if distance < self.max_player_ball_distance:
                if distance < miniumum_distance:
                    miniumum_distance = distance
                    assigned_player = player_id

        return assigned_player

**shot_detector**

In [10]:
import numpy as np

class ShotDetector:
    def __init__(self, tracks, df, team_df, annotations_data, batch_size=60):
        """
        Initialize the ShotDetector with tracking data, a DataFrame for stats, and annotations data.
        :param tracks: Dictionary containing the tracking data of players and the ball.
        :param df: A Pandas DataFrame to store player statistics.
        :param team_df: A Pandas DataFrame to store team statistics.
        :param annotations_data: Dictionary with goal line and goal area points per frame.
        :param batch_size: Number of frames to process in each batch.
        """
        self.tracks = tracks
        self.df = df
        self.team_df = team_df
        self.annotations_data = annotations_data
        self.corners_team_1 = 0
        self.corners_team_2 = 0
        self.player_with_ball_history = []
        self.skip_until_frame = 0  # Tracks when to start processing again after skipping frames
        self.batch_size = batch_size  # Store the batch size to calculate frame skips

    def point_in_goal_area(self, ball_position, frame_num):
        """
        Check if the ball is inside the rectangular goal area for the current frame.
        If no goal polygons are defined for the current frame, check adjacent frames (20 backward and 20 forward).
        """
        x, y = ball_position
        print(f"{x} and {y}")
        # Step 1: Check for goal polygons in the current frame
        goal_polygons = self.annotations_data.get(frame_num, {}).get('goal_points', [])
    
        if goal_polygons:
            print("checking goal")
            # If goal polygons are available for the current frame, check normally
            for goal_area in goal_polygons:
                if len(goal_area) != 4:
                    continue  # Skip non-rectangular areas

                # Extract x and y coordinates
                x_coords = [point[0] for point in goal_area]
                y_coords = [point[1] for point in goal_area]

                # Get min and max bounds
                min_x, max_x = min(x_coords), max(x_coords)
                min_y, max_y = min(y_coords), max(y_coords)
                print(f"{min_x} and {max_x} and {min_y} and {max_y}")

                # Check if the ball is within the bounds
                if min_x <= x <= max_x and min_y <= y <= max_y:
                    return True
        
            # If no goal area contains the ball, return False for the current frame
            return False

        # Step 2: If no goal polygons for the current frame, check adjacent frames
        frame_range = range(frame_num - 60, frame_num + 60)

        for frame in frame_range:
            print("checking goal!")
            # Safely handle missing frames in the dataset
            goal_polygons = self.annotations_data.get(frame, {}).get('goal_points', [])
        
            if not goal_polygons:
                continue  # Skip frames that don't exist or have no goal points

            # Check the goal polygons in the same way as above
            for goal_area in goal_polygons:
                if len(goal_area) != 4:
                    continue  # Skip non-rectangular areas

                # Extract x and y coordinates
                x_coords = [point[0] for point in goal_area]
                y_coords = [point[1] for point in goal_area]

                # Get min and max bounds
                min_x, max_x = min(x_coords), max(x_coords)
                min_y, max_y = min(y_coords), max(y_coords)
                print(f"{min_x} and {max_x} and {min_y} and {max_y}")

                # Check if the ball is within the bounds
                if min_x <= x <= max_x and min_y <= y <= max_y:
                    return True

        # Return False if no goal area contains the ball across all the checked frames
        return False

    def ball_crosses_goal_line(self, ball_position, frame_num):
        """
        Check if the ball has crossed the goal line (used to detect shots).
        """
        x_ball, y_ball = ball_position
        goal_line_points = self.annotations_data[frame_num]['goal_line_points']
        if not goal_line_points:
            return False

        nearest_line = self.get_nearest_goal_line(goal_line_points)
        (x1, y1), (x2, y2) = nearest_line
        cross_product = (x2 - x1) * (y_ball - y1) - (y2 - y1) * (x_ball - x1)
        return cross_product > 0

    def get_nearest_goal_line(self, goal_line_points):
        """
        Get the nearest goal line to the edge of the frame.
        """
        min_distance = float('inf')
        nearest_line = goal_line_points[0]
        for line in goal_line_points:
            x1, y1 = line[0]
            x2, y2 = line[1]
            distance_from_edge = min(x1, x2)
            if distance_from_edge < min_distance:
                min_distance = distance_from_edge
                nearest_line = line
        return nearest_line

    def detect_shots_and_outcomes(self, frame_range):
        """
        Detect shots, goals, assists, key passes, and corners from a range of frames.
        """
        for frame_num in frame_range:
            if frame_num < self.skip_until_frame:
                continue  # Skip processing frames until the skip period is over

            if frame_num not in self.annotations_data:
                continue

            ball_position = self.tracks['ball'][frame_num][1]['bbox']
            ball_position = get_center_of_bbox(ball_position)
            self.track_passes(frame_num)

            current_shooter = self.player_with_ball_history[-1] if self.player_with_ball_history else None

            if current_shooter:
                last_touch_team = self.tracks['players'][frame_num][current_shooter]['team']

                if self.ball_crosses_goal_line(ball_position, frame_num):
                    print("Ball crossed goal line")
                    self.update_statistics(current_shooter, 'Total_shots')
                    if self.point_in_goal_area(ball_position, frame_num):
                        print("GOAL !")
                        self.update_statistics(current_shooter, 'Goals')
                        self.update_statistics(current_shooter, 'Shots_on_Target')
                        assist_player = self.find_assist_player(current_shooter, frame_num)
                        if assist_player:
                            self.update_statistics(assist_player, 'Assists')
                    else:
                        if self.is_saved(current_shooter, frame_num):
                            self.update_statistics(current_shooter, 'Saved_shots')
                            self.update_statistics(current_shooter, 'Shots_on_Target')
                            if self.is_corner(ball_position, frame_num):
                                if last_touch_team == 1:
                                    self.corners_team_2 += 1
                                else:
                                    self.corners_team_1 += 1
                        elif self.is_blocked(current_shooter, frame_num):
                            self.update_statistics(current_shooter, 'Blocked_shots')
                            self.update_statistics(current_shooter, 'Shots_on_Target')
                            if self.is_corner(ball_position, frame_num):
                                if last_touch_team == 1:
                                    self.corners_team_2 += 1
                                else:
                                    self.corners_team_1 += 1
                        else:
                            self.update_statistics(current_shooter, 'Shots_off_Target')

                        key_pass_player = self.find_assist_player(current_shooter, frame_num)
                        if key_pass_player:
                            self.update_statistics(key_pass_player, 'Key_passes')

                    

                    # Skip the next 2 batches of frames after a shot is detected
                    self.skip_until_frame = frame_num + (7 * self.batch_size)
                    print(f"Skipping frames until frame {self.skip_until_frame}")

    def is_corner(self, ball_position, frame_num):
        """
        Determine if a corner should be awarded.
        """
        if self.ball_crosses_goal_line(ball_position, frame_num) and not self.point_in_goal_area(ball_position, frame_num):
            return True
        return False

    def track_passes(self, frame_num):
        """
        Track the passing sequence by detecting when the player who has the ball changes.
        """
        player_with_ball = None
        for player_id, player_data in self.tracks['players'][frame_num].items():
            if player_data.get('has_ball', False):
                player_with_ball = player_id
                break

        if self.player_with_ball_history and self.player_with_ball_history[-1] != player_with_ball:
            self.player_with_ball_history.append(player_with_ball)
            if len(self.player_with_ball_history) > 10:
                self.player_with_ball_history.pop(0)
        elif not self.player_with_ball_history:
            self.player_with_ball_history.append(player_with_ball)

    def find_assist_player(self, current_shooter, frame_num):
        """
        Find the player who made the pass to the current shooter.
        """
        shooter_team = self.tracks['players'][frame_num][current_shooter]['team']

        for player_id in reversed(self.player_with_ball_history[:-1]):
            if player_id is None or player_id not in self.tracks['players'][frame_num]:
                continue
            player_team = self.tracks['players'][frame_num][player_id]['team']
            if player_id == current_shooter:
                continue
            if player_team == shooter_team:
                return player_id
        return None

    def is_saved(self, current_shooter, frame_num):
        """
        Determine if the shot was saved by the goalkeeper.
        """
        shooter_team = self.tracks['players'][frame_num][current_shooter]['team']
        shooter_found = False

        for player_id in reversed(self.player_with_ball_history):
            if player_id is None or player_id not in self.tracks['players'][frame_num]:
                continue
            if player_id == current_shooter:
                shooter_found = True
                continue
            if shooter_found:
                player_team = self.tracks['players'][frame_num][player_id]['team']
                if player_team != shooter_team:
                    self.update_statistics(player_id, 'Clearances')
                    return True
        return False

    def is_blocked(self, current_shooter, frame_num):
        """
        Determine if the shot was blocked by a defender.
        """
        shooter_team = self.tracks['players'][frame_num][current_shooter]['team']
        shooter_found = False

        for player_id in reversed(self.player_with_ball_history):
            if player_id is None or player_id not in self.tracks['players'][frame_num]:
                continue
            if player_id == current_shooter:
                shooter_found = True
                continue
            if shooter_found:
                player_team = self.tracks['players'][frame_num][player_id]['team']
                if player_team != shooter_team:
                    self.update_statistics(player_id, 'Blocked_shots')
                    return True
        return False

    def update_statistics(self, player_id, column_name):
        """
        Update the specified column in the DataFrame for a given player.
        """
        initialized_columns = ['Goals', 'Assists', 'Key_passes', 'Shots_on_Target',
                               'Shots_off_Target', 'Saved_shots', 'Blocked_shots',
                               'Clearances',"Total_shots"]
        if player_id not in self.df.index or self.df.loc[player_id, initialized_columns].isnull().any():
            self.df.loc[player_id] = {col: 0 for col in initialized_columns}

        # Increment the specific column for the player
        self.df.at[player_id, column_name] += 1

    def process_frames_in_batches(self, batch_size=60):
        """
        Process the game data in batches of frames.
        :param batch_size: The number of frames to process in each batch.
        :return: None. The corners are assigned to the team DataFrame.
        """
        total_frames = len(self.tracks['ball'])

        for start_frame in range(0, total_frames, batch_size):
            end_frame = min(start_frame + batch_size, total_frames)
            frame_range = list(range(start_frame, end_frame))

            self.detect_shots_and_outcomes(frame_range)

        # Assign the number of corners to the DataFrame for both teams
        self.team_df.at[1, 'corners'] = self.corners_team_1
        self.team_df.at[2, 'corners'] = self.corners_team_2

        return self.df, self.team_df

**speed_and_distance_estimator**

In [11]:
import cv2
import pandas as pd

class SpeedAndDistance_Estimator():
    def __init__(self):
        self.frame_window = 5
        self.frame_rate = 24
    
    def update_df_with_speed_and_distance(self, tracks, df):
        # List of columns to initialize
        columns_to_initialize = ['Distance_covered', 'Avg_speed', 'Highest_speed']

        # Initialize the columns with zeros (whether they already exist or not)
        for column in columns_to_initialize:
            if column not in df.columns:
                df[column] = 0  # Add the column if it doesn't exist
            else:
                df[column] = df[column].fillna(0)  # Use direct assignment to avoid FutureWarning

        total_distance = {}
        speed_sum = {}
        highest_speed = {}

        # Iterate through each object (player, team, etc.)
        for object, object_tracks in tracks.items():
            if object == "ball" or object == "referees" or object == "goalkeepers":
                continue

            number_of_frames = len(object_tracks)

            # Iterate through frames to compute distance and speed
            for frame_num in range(0, number_of_frames, self.frame_window):
                last_frame = min(frame_num + self.frame_window, number_of_frames - 1)

                for track_id, _ in object_tracks[frame_num].items():
                    if track_id not in object_tracks[last_frame]:
                        continue

                    # Check if 'position_transformed' exists
                    if 'position_transformed' not in object_tracks[frame_num][track_id] or 'position_transformed' not in object_tracks[last_frame][track_id]:
                        print(f"Skipping track {track_id} at frame {frame_num}: 'position_transformed' not found")
                        continue

                    start_position = object_tracks[frame_num][track_id]['position_transformed']
                    end_position = object_tracks[last_frame][track_id]['position_transformed']

                    if start_position is None or end_position is None:
                        continue

                    # Compute distance covered and speed
                    distance_covered = measure_distance(start_position, end_position)
                    time_elapsed = (last_frame - frame_num) / self.frame_rate
                    speed_meters_per_second = distance_covered / time_elapsed
                    speed_km_per_hour = speed_meters_per_second * 3.6

                    # Initialize player stats if not already in the dictionaries
                    if track_id not in total_distance:
                        total_distance[track_id] = 0
                        speed_sum[track_id] = 0
                        highest_speed[track_id] = 0

                    # Update total distance, speed sum, and highest speed for the player
                    total_distance[track_id] += distance_covered
                    speed_sum[track_id] += speed_km_per_hour
                    highest_speed[track_id] = max(highest_speed[track_id], speed_km_per_hour)

            # After processing all frames, calculate average speed for each player
            for track_id in total_distance.keys():
                avg_speed = speed_sum[track_id] / (number_of_frames / self.frame_window)

                # Update the total_distance_covered, avg_speed, and highest_speed in the DataFrame using track_id as the index
                df.at[track_id, 'Distance_covered'] = total_distance[track_id]
                df.at[track_id, 'Avg_speed'] = avg_speed
                df.at[track_id, 'Highest_speed'] = highest_speed[track_id]

        return df

**substitution_detector**

In [12]:
import cv2
import numpy as np
from ultralytics import YOLO

class SubstitutionDetector:
    def __init__(self, class_thresholds, model_path, team_df):
        self.class_thresholds = class_thresholds
        self.team_df = team_df.copy()  # Create a copy of the team DataFrame to avoid SettingWithCopyWarning

        # Load the model using the provided model path
        self.number_model = YOLO(model_path)

    def process_annotations_class_5_only(self, annotations):
        # Flatten annotations (annotations per frame to a single list)
        flattened_annotations = [
            (frame_num, detection) for frame_num, frame_annotations in enumerate(annotations)
            for detection in frame_annotations
        ]

        filtered_detections = []
        for frame_num, detection in flattened_annotations:
            class_id = int(detection[5])
            confidence = detection[4]
            # Only include class 5 with the specified confidence threshold
            if class_id == 5 and confidence >= self.class_thresholds.get(class_id, 0):
                # Append the frame number along with the detection to preserve context
                filtered_detections.append((frame_num, detection))

        return filtered_detections

    def detect_numbers(self, cropped_image):
        # Use the YOLO model to detect numbers from the cropped image directly
        image_rgb = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB)
        image_rgb = np.ascontiguousarray(image_rgb)

        # Detect numbers using the YOLO model
        results = self.number_model(image_rgb)
        df_results = results[0].boxes.data.cpu().numpy()

        detected_numbers = []
        for detection in df_results:
            class_id = int(detection[5])
            confidence = detection[4]
            if confidence >= 0.25:  # Adjusted confidence threshold
                detected_numbers.append(class_id)

        return detected_numbers

    def crop_image(self, image, bbox):
        xmin, ymin, xmax, ymax = map(int, bbox[:4])
        return image[ymin:ymax, xmin:xmax]

    def detect_dominant_color(self, image):
        """Detect whether the cropped image is predominantly red or green."""
        hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

        # Define red and green color ranges in HSV
        red_lower1, red_upper1 = np.array([0, 90, 90]), np.array([10, 255, 255])
        red_lower2, red_upper2 = np.array([160, 90, 90]), np.array([180, 255, 255])
        green_lower, green_upper = np.array([40, 50, 50]), np.array([80, 255, 255])

        # Create masks for red and green
        red_mask1 = cv2.inRange(hsv_image, red_lower1, red_upper1)
        red_mask2 = cv2.inRange(hsv_image, red_lower2, red_upper2)
        red_mask = red_mask1 | red_mask2  # Combine red masks
        green_mask = cv2.inRange(hsv_image, green_lower, green_upper)

        # Count the number of red and green pixels
        red_pixels = cv2.countNonZero(red_mask)
        green_pixels = cv2.countNonZero(green_mask)

        # Determine the dominant color based on pixel count
        if red_pixels > green_pixels:
            return "red"
        elif green_pixels > red_pixels:
            return "green"
        else:
            return "unknown"

    def update_substitution(self, team, red_number, green_number):
        """Update the substitution count for the team in the format 'red_number-green_number'."""
        substitution_value = f"{red_number}-{green_number}"
        # Find the first substitution slot that is zero and update it with "red_number-green_number"
        for i in range(1, 6):
            substitution_column = f"substitution_{i}"
            print(substitution_column)
            if self.team_df.loc[team, substitution_column] == 0:
                print(substitution_value)
                self.team_df.loc[team, substitution_column] = substitution_value
                break

    def extract_annotation(self, frames, annotations, tracks):
        ocr_results = {'green': [], 'red': []}  # Separate results for green and red numbers
        red_number = None
        green_number = None

        # Process annotations and handle frame numbers correctly
        detections = self.process_annotations_class_5_only(annotations)

        for frame_num, bbox in detections:
            # Process each detection within the frame
            cropped_image = self.crop_image(frames[frame_num], bbox)
            dominant_color = self.detect_dominant_color(cropped_image)

            if dominant_color in ["green", "red"]:
                detected_numbers = self.detect_numbers(cropped_image)
                if detected_numbers:
                    closest_player_id = self.find_closest_player(bbox, tracks, frame_num)

                    if closest_player_id is None:
                        print(f"No player found close to bbox {bbox} in frame {frame_num}")
                        team = 1
                    else :
                        team = tracks['players'][frame_num][closest_player_id]['team']

                    # Assign detected numbers to red or green
                    if dominant_color == "red" and detected_numbers:
                        red_number = detected_numbers[0]  # Take the first detected red number
                    elif dominant_color == "green" and detected_numbers:
                        green_number = detected_numbers[0]  # Take the first detected green number

                    # If both red and green numbers are detected, update substitution
                    if red_number is not None and green_number is not None:
                        self.update_substitution(team, red_number, green_number)

                    # Store results based on color
                    ocr_results[dominant_color].append(detected_numbers)

        return ocr_results, self.team_df

    def find_closest_player(self, bbox, tracks, frame_num):
        """Find the closest player to the class 5 annotation based on bounding box distance."""
        xmin, ymin, xmax, ymax = map(int, bbox[:4])
        detection_center = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2])

        min_distance = float('inf')
        closest_player_id = None

        for player_id, player_info in tracks['players'][frame_num].items():
            player_bbox = player_info['bbox']
            player_center = np.array([(player_bbox[0] + player_bbox[2]) / 2, (player_bbox[1] + player_bbox[3]) / 2])
            distance = np.linalg.norm(detection_center - player_center)

            if distance < min_distance:
                min_distance = distance
                closest_player_id = player_id

        return closest_player_id

**team_assigner**

In [13]:
from sklearn.cluster import KMeans

class TeamAssigner:
    def __init__(self):
        self.team_colors = {}
        self.player_team_dict = {}
        self.kmeans = None  # Store the last kmeans model

    def get_clustering_model(self, image):
        # Reshape the image to a 2D array
        image_2d = image.reshape(-1, 3)

        # Perform K-means with 2 clusters
        kmeans = KMeans(n_clusters=2, init="k-means++", n_init=1)
        kmeans.fit(image_2d)

        return kmeans

    def get_player_color(self, frame, bbox):
        # Crop the image to the player's bounding box
        image = frame[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]

        # Use only the top half of the player's image
        top_half_image = image[0:int(image.shape[0] / 2), :]

        # Get the clustering model for the top half
        kmeans = self.get_clustering_model(top_half_image)

        # Get the cluster labels for each pixel
        labels = kmeans.labels_

        # Reshape the labels to match the image shape
        clustered_image = labels.reshape(top_half_image.shape[0], top_half_image.shape[1])

        # Get the player cluster (different from background)
        corner_clusters = [clustered_image[0, 0], clustered_image[0, -1], clustered_image[-1, 0], clustered_image[-1, -1]]
        non_player_cluster = max(set(corner_clusters), key=corner_clusters.count)
        player_cluster = 1 - non_player_cluster

        # Get the color of the player cluster
        player_color = kmeans.cluster_centers_[player_cluster]

        return player_color

    def assign_team_color(self, frame, player_detections):
        player_colors = []

        # Get player colors from each detection
        for _, player_detection in player_detections.items():
            bbox = player_detection["bbox"]
            player_color = self.get_player_color(frame, bbox)
            player_colors.append(player_color)

        if len(player_colors) == 0:
            print("No player colors detected. Skipping KMeans clustering.")
            if self.kmeans is not None:
                print("Using the previous KMeans model.")
            return  # Skip clustering if no colors are detected and use the last KMeans model

        # Create new KMeans clustering model if player colors are detected
        kmeans = KMeans(n_clusters=2, init="k-means++", n_init=10)
        kmeans.fit(player_colors)

        # Save the KMeans model for future use
        self.kmeans = kmeans

        # Store team colors for each cluster
        self.team_colors[1] = kmeans.cluster_centers_[0]
        self.team_colors[2] = kmeans.cluster_centers_[1]

    def get_player_team(self, frame, player_bbox, player_id):
        # Return the team if already assigned
        if player_id in self.player_team_dict:
            return self.player_team_dict[player_id]

        # Get the player's color
        player_color = self.get_player_color(frame, player_bbox)

        # Use the last KMeans model to predict the team
        team_id = self.kmeans.predict(player_color.reshape(1, -1))[0]
        team_id += 1  # Ensure team ID starts from 1

        # Store the player's team for future reference
        self.player_team_dict[player_id] = team_id

        return team_id


**team_process**

In [14]:
import pandas as pd

class TeamProcess:
    def __init__(self, df):
        """
        Initialize the WinLoseProcess class with a DataFrame that contains
        'goals' and 'goals_conceded' columns.
        :param df: Pandas DataFrame containing the match data.
        """
        self.df = df

    def process_win_lose_draw(self):
        """
        Process the DataFrame to add three new columns: 'win', 'lose', 'draw'.
        Based on the comparison of 'goals' and 'goals_conceded' columns, the appropriate
        column (win/lose/draw) will be set to 1.
        """
        # Initialize the columns to 0
        self.df['win'] = 0
        self.df['lose'] = 0
        self.df['draw'] = 0

        # Apply the logic to set the appropriate column to 1 based on goals and goals conceded
        self.df.loc[self.df['goals'] > self.df['goals_conceded'], 'win'] = 1
        self.df.loc[self.df['goals'] < self.df['goals_conceded'], 'lose'] = 1
        self.df.loc[self.df['goals'] == self.df['goals_conceded'], 'draw'] = 1

        return self.df

    def calculate_possession(self):
        """
        Calculate the possession for each team based on the 'completed_passes' column.
        Possession is calculated as the percentage of total completed passes in a match (two rows).
        Adds a new column 'possession' to the DataFrame.
        """
        # Initialize the possession column to 0
        self.df['possession'] = 0

        # Process two rows at a time (assuming each pair of rows is one match)
        for i in range(0, len(self.df), 2):
            # Get the completed passes for both teams in the match
            team_a_passes = self.df.loc[i, 'completed_passes']
            team_b_passes = self.df.loc[i + 1, 'completed_passes']
            
            # Total passes for both teams in the match
            total_passes = team_a_passes + team_b_passes
            
            # Calculate possession for both teams
            self.df.loc[i, 'possession'] = (team_a_passes / total_passes) * 100
            self.df.loc[i + 1, 'possession'] = (team_b_passes / total_passes) * 100

        return self.df

**tracker**

In [15]:
from ultralytics import YOLO
import supervision as sv
import pickle
import os
import pandas as pd
import cv2
import numpy as np

class Tracker:
    def __init__(self, model_path):
        self.model = YOLO(model_path)
        self.tracker = sv.ByteTrack()

    def add_position_to_tracks(self, tracks):
        for object, object_tracks in tracks.items():
            for frame_num, track in enumerate(object_tracks):
                for track_id, track_info in track.items():
                    bbox = track_info['bbox']
                    if object == 'ball':
                        position = get_center_of_bbox(bbox)
                    else:
                        position = get_foot_position(bbox)
                    tracks[object][frame_num][track_id]['position'] = position

    def interpolate_ball_positions(self, ball_positions):
        # Ensure 'bbox' always contains 4 values, even if missing or incomplete
        ball_positions = [
            x.get(1, {}).get('bbox', [None, None, None, None]) 
            for x in ball_positions
        ]

        # Skip rows where all bbox values are None
        ball_positions = [bbox for bbox in ball_positions if any(v is not None for v in bbox)]

        # Check if there's any data left after skipping all-None rows
        if not ball_positions:
            return ball_positions  # or handle empty case as needed

        # Create DataFrame with bounding box coordinates
        df_ball_positions = pd.DataFrame(ball_positions, columns=['x1', 'y1', 'x2', 'y2'])
    
        # Handle cases where 'bbox' contains missing values: interpolate and backfill
        df_ball_positions = df_ball_positions.interpolate().bfill()

        # Convert the DataFrame back to the original format
        ball_positions = [{1: {"bbox": x}} for x in df_ball_positions.to_numpy().tolist()]

        return ball_positions

    def detect_frames(self, frames):
        batch_size = 20
        detections = []
        for i in range(0, len(frames), batch_size):
            detections_batch = self.model.predict(frames[i:i+batch_size], conf=0.1)
            detections += detections_batch
        return detections

    def get_object_tracks(self, frames):
        detections = self.detect_frames(frames)

        tracks = {
            "players": [],
            "referees": [],
            "ball": [],
            "goalkeepers": []
        }

        for frame_num, detection in enumerate(detections):
            cls_names = detection.names
            cls_names_inv = {v: k for k, v in cls_names.items()}

            # Convert to Supervision Detection format
            detection_supervision = sv.Detections.from_ultralytics(detection)

            # Track Objects
            detection_with_tracks = self.tracker.update_with_detections(detection_supervision)

            tracks["players"].append({})
            tracks["referees"].append({})
            tracks["ball"].append({})
            tracks["goalkeepers"].append({})

            for frame_detection in detection_with_tracks:
                bbox = frame_detection[0].tolist()
                cls_id = frame_detection[3]
                track_id = frame_detection[4]

                if cls_id == cls_names_inv['player']:
                    tracks["players"][frame_num][track_id] = {"bbox": bbox}

                if cls_id == cls_names_inv['referee']:
                    tracks["referees"][frame_num][track_id] = {"bbox": bbox}
                    
                if cls_id == cls_names_inv['goalkeeper']:
                    tracks["goalkeepers"][frame_num][track_id] = {"bbox": bbox}

            for frame_detection in detection_supervision:
                bbox = frame_detection[0].tolist()
                cls_id = frame_detection[3]

                if cls_id == cls_names_inv['ball']:
                    tracks["ball"][frame_num][1] = {"bbox": bbox}

        return tracks


**view_transformer**

In [16]:
import numpy as np 
import cv2

class ViewTransformer():
    def __init__(self):
        court_width = 68
        court_length = 23.32

        self.pixel_vertices = np.array([[110, 1035], 
                               [265, 275], 
                               [910, 260], 
                               [1640, 915]])
        
        self.target_vertices = np.array([
            [0,court_width],
            [0, 0],
            [court_length, 0],
            [court_length, court_width]
        ])

        self.pixel_vertices = self.pixel_vertices.astype(np.float32)
        self.target_vertices = self.target_vertices.astype(np.float32)

        self.persepctive_trasnformer = cv2.getPerspectiveTransform(self.pixel_vertices, self.target_vertices)

    def transform_point(self,point):
        p = (int(point[0]),int(point[1]))
        is_inside = cv2.pointPolygonTest(self.pixel_vertices,p,False) >= 0 
        if not is_inside:
            return None

        reshaped_point = point.reshape(-1,1,2).astype(np.float32)
        tranform_point = cv2.perspectiveTransform(reshaped_point,self.persepctive_trasnformer)
        return tranform_point.reshape(-1,2)

    def add_transformed_position_to_tracks(self,tracks):
        for object, object_tracks in tracks.items():
            for frame_num, track in enumerate(object_tracks):
                for track_id, track_info in track.items():
                    position = track_info['position_adjusted']
                    position = np.array(position)
                    position_trasnformed = self.transform_point(position)
                    if position_trasnformed is not None:
                        position_trasnformed = position_trasnformed.squeeze().tolist()
                    tracks[object][frame_num][track_id]['position_transformed'] = position_trasnformed
                    

**bbox_utils**

In [17]:
def get_center_of_bbox(bbox):
    x1,y1,x2,y2 = bbox
    return int((x1+x2)/2),int((y1+y2)/2)

def get_bbox_width(bbox):
    return bbox[2]-bbox[0]

def measure_distance(p1,p2):
    return ((p1[0]-p2[0])**2 + (p1[1]-p2[1])**2)**0.5

def measure_xy_distance(p1,p2):
    return p1[0]-p2[0],p1[1]-p2[1]

def get_foot_position(bbox):
    x1,y1,x2,y2 = bbox
    return int((x1+x2)/2),int(y2)

**goal_and_line_utils**

In [18]:
import cv2
import numpy as np

class GoalAndLineProcessor:
    def __init__(self):
        self.video_writer = None  # Video writer for saving the output video

    def process_annotations(self, video_frames, annotations, output_video_path, fps=60.0):
        frame_height, frame_width = video_frames[0].shape[:2]
        
        # Initialize the video writer to save the output video
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for MP4 video
        self.video_writer = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

        for i, frame in enumerate(video_frames):
            frame_annotations = annotations[i]  # Get annotations for the current frame
            goal_points, goal_line_points = self.extract_goal_and_goal_line(frame, frame_annotations)
            annotated_frame = self.draw_goal_and_goal_line(frame, goal_points, goal_line_points)
            
            # Write the annotated frame to the video
            self.video_writer.write(annotated_frame)

        # Release the video writer after writing all frames
        self.video_writer.release()

    def extract_goal_and_goal_line(self, frame, detections):
        goal_points = []
        goal_line_points = []

        for detection in detections:
            xmin, ymin, xmax, ymax, confidence, class_id = detection[:6]

            # Convert coordinates to integers
            xmin = int(xmin)
            ymin = int(ymin)
            xmax = int(xmax)
            ymax = int(ymax)

            # If the detection class is the goal (assumed class_id == 2)
            if class_id == 2:  
                # Define the goal polygon using the bounding box corners
                goal_points.append([
                    (xmin, ymin),  # Top-left
                    (xmax, ymin),  # Top-right
                    (xmax, ymax),  # Bottom-right
                    (xmin, ymax)   # Bottom-left
                ])
            elif class_id == 3:  # Class 3 is the goal line
                goal_line_points = self.detect_goal_line_points(frame, xmin, ymin, xmax, ymax)

        return goal_points, goal_line_points

    def detect_goal_line_points(self, frame, xmin, ymin, xmax, ymax):
        # Convert ROI to grayscale and apply Gaussian Blur
        gray_roi = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        blurred_roi = cv2.GaussianBlur(gray_roi, (5, 5), 0)

        # Canny edge detection
        edges = cv2.Canny(blurred_roi, 50, 150)

        # Use Hough Transform to find lines
        lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=50, minLineLength=100, maxLineGap=10)

        if lines is not None:
            for line in lines:
                x1, y1, x2, y2 = line[0]
                return [(x1, y1), (x2, y2)]  # Return the start and end points of the line

        return []  # Return empty if no points are found

    def draw_goal_and_goal_line(self, frame, goal_points, goal_line_points):
        # Draw all detected goals as filled polygons
        for points in goal_points:
            pts = np.array(points, np.int32)
            pts = pts.reshape((-1, 1, 2))
            cv2.polylines(frame, [pts], isClosed=True, color=(0, 255, 0), thickness=4)  # Fill polygon in green
            cv2.polylines(frame, [pts], isClosed=True, color=(255, 0, 0), thickness=4)  # Draw outline in blue

        # Draw the goal line
        if len(goal_line_points) == 2:
            pt1, pt2 = goal_line_points
            cv2.line(frame, pt1, pt2, (255, 0, 0), 3)  # Draw line in blue

        return frame  # Return the annotated frame

    # New function to return the frame number, goal points, and goal line points
    def get_goal_and_line_data(self, video_frames, annotations):
        result = {}

        for i, frame in enumerate(video_frames):
            frame_annotations = annotations[i]  # Get annotations for the current frame
            goal_points, goal_line_points = self.extract_goal_and_goal_line(frame, frame_annotations)

            # Ensure goal_line_points is always a list of lists (even if it's empty)
            if goal_line_points:
                goal_line_points = [goal_line_points] 
            # Store the data for each frame
            result[i] = {
                'goal_points': goal_points,
                'goal_line_points': goal_line_points
            }

        return result


**main_utils**

In [19]:
import csv 

def save_tracks_to_csv(tracks, csv_path='output_tracks.csv'):
    """
    Save the tracking data to a CSV file.

    :param tracks: Dictionary containing the tracks for players, referees, ball, and goalkeepers
    :param csv_path: The file path for the output CSV
    """
    with open(csv_path, mode='w', newline='') as file:
        writer = csv.writer(file)
        # Write the CSV header
        writer.writerow(['Frame Number', 'Class Label', 'Track ID', 'Bounding Box (x1, y1, x2, y2)'])

        # Iterate over the tracks for each frame and write to CSV
        for frame_num, (players, referees, ball, goalkeepers) in enumerate(
                zip(tracks['players'], tracks['referees'], tracks['ball'], tracks['goalkeepers'])):

            # Write player tracks
            for track_id, track_info in players.items():
                bbox = track_info['bbox']
                writer.writerow([frame_num, 'player', track_id, bbox])

            # Write referee tracks
            for track_id, track_info in referees.items():
                bbox = track_info['bbox']
                writer.writerow([frame_num, 'referee', track_id, bbox])

            # Write ball tracks
            for track_id, track_info in ball.items():
                bbox = track_info['bbox']
                writer.writerow([frame_num, 'ball', track_id, bbox])

            # Write goalkeeper tracks
            for track_id, track_info in goalkeepers.items():
                bbox = track_info['bbox']
                writer.writerow([frame_num, 'goalkeeper', track_id, bbox])

    print(f"Tracks have been saved to {csv_path}")
    
    
import pandas as pd

def initialize_dataframe(tracks=None):
    """
    Initializes a DataFrame for individual player statistics.
    
    If tracking data is provided, the DataFrame is initialized with unique track IDs.
    If no tracking data is provided, an empty DataFrame with the appropriate columns is returned.
    
    :param tracks: Optional tracking data with players and their actions.
    :return: Pandas DataFrame with player stats initialized to zeros or empty with columns.
    """
    # Define the column names for player statistics
    columns = [
        "shirt_number", "Position", "Goals", "Assists", "Total_shots", 
        "Shots_on_Target", "Shots_off_Target", "Blocked_shots", 
        "Saved_shots", "Total_passes", "pass_failure", 
        "Pass_Success", "Key_passes", "dribble_attempt", 
        "dribble_failure", "dribble_success", "offensive_failure", 
        "defensive_failure", "offensive_success", 
        "defensive_success", "Tackles_attempted", 
        "tackling_failure", "tackling_success", "Clearances", 
        "Interceptions", "injuries", "Distance_covered", 
        "Avg_speed", "Highest_speed", "dribbled_past"
    ]

    # Check if tracks is provided
    if tracks:
        unique_track_ids = set()

        # Extract unique player IDs from tracks
        for frame_num, players in enumerate(tracks['players']):
            for track_id in players.keys():
                unique_track_ids.add(track_id)

        # Initialize DataFrame with unique player IDs as index
        df = pd.DataFrame(index=list(unique_track_ids), columns=columns)
    else:
        # Initialize an empty DataFrame with only the columns
        df = pd.DataFrame(columns=columns)

    return df


def initialize_team_df():
    """
    Initializes a DataFrame with rows for teams 1 and 2, and a single column 'corners'.
    :return: A Pandas DataFrame with team IDs and 'corners' column initialized to 0.
    """
    data = {'corners': [0, 0],'formations': [0, 0],
            'substitution_1': 0,'substitution_2': 0,'substitution_3': 0,
            'substitution_4': 0,'substitution_5': 0}  # Initialize corners to 0 for both teams
    team_df = pd.DataFrame(data, index=[1, 2])  # Create the DataFrame with team IDs as index
    return team_df

def read_video_in_batches(video_reader, start_frame, batch_size):
    """
    Reads a batch of frames from a video.

    :param video_reader: cv2.VideoCapture object used to read the video.
    :param start_frame: The starting frame number for the batch.
    :param batch_size: The number of frames to read in each batch.
    :return: A list of video frames.
    """
    # Set the starting frame position for the batch
    video_reader.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

    frames = []
    for _ in range(batch_size):
        ret, frame = video_reader.read()
        if not ret:
            break
        frames.append(frame)
    
    return frames


**recommender_system_utils**

In [20]:
# Helper functions
def euclidean_distance(color1, color2):
    return np.sqrt(np.sum((color1 - color2) ** 2))

def clean_color_string(color_string):
    color_list = color_string.strip("[]").split()  # Split on spaces
    return np.array(color_list, dtype=float)

def find_closest_player_dataset(player_data, target_color, dataset_key):
    non_goalkeeper_data = player_data[player_data['position'] != 'goalkeeper']  # Exclude goalkeepers
    player_colors = non_goalkeeper_data['team_color'].apply(clean_team_color)
    distances = np.array([euclidean_distance(target_color, player_color) for player_color in player_colors])

    # Find the index of the minimum distance
    if len(distances) > 0:
        closest_distance_idx = np.argmin(distances)
        return dataset_key  # Return the dataset key if a valid match is found

def color_distance(row_color, target_color):
    return np.linalg.norm(row_color - target_color)

def find_closest_match(df, target_color):
    df['team_color_array'] = df['team_color'].apply(lambda x: np.fromstring(x.strip("[]"), sep=','))

    df['distance'] = df['team_color_array'].apply(lambda x: color_distance(x, target_color))

    closest_row = df.loc[df['distance'].idxmin()]

    df.drop(columns=['team_color_array', 'distance'], inplace=True)

    return closest_row

# Function to clean and convert the 'Team_Color' string to a NumPy array
def clean_team_color(color_string):
    # Remove any unwanted characters (e.g., commas) and split the string into numeric parts
    cleaned_string = color_string.replace(",", " ").strip("[]")  # Remove commas and brackets
    return np.array(cleaned_string.split(), dtype=float)

def correct_formation_format(value):
    if isinstance(value, str) and '/' in value:  # Check if the value contains '/'
        # Split by '/' assuming it's a wrongly formatted date-like string (e.g., 4/3/2003)
        parts = value.split('/')
        if len(parts) == 3:  # Ensure it's a date-like format
            # Extract the day, month, and last digit of the year to form the correct formation
            defenders = parts[0]  # First part represents the number of defenders
            midfielders = parts[1]  # Second part represents the number of midfielders
            attackers = parts[2][-1]  # Use the last digit of the year as the number of attackers
            return f'{defenders}-{midfielders}-{attackers}'
    return value  # Return the original value if no correction is needed

**Gemini_utils**

In [21]:
# Load the team data from CSV files
def load_data(file_path):
    return pd.read_csv(file_path, encoding='ISO-8859-1')

# Read the system instructions from generate_prompt.py
def read_system_instructions(path):
    with open(path, 'r') as file:
        return file.read()

def send_to_gemini_api_with_retry(prompt, max_retries=5, delay=2):
    """
    Tries to send the prompt to the Gemini API multiple times until it receives valid JSON.
    """
    attempt = 0
    while attempt < max_retries:
        try:
            # Use the correct method to generate the response
            response = model.generate_content(prompt)
            if response:
                # Try to parse the response as JSON
                response_text = response._result.candidates[0].content.parts[0].text
                return json.loads(response_text)  # Attempt to return parsed JSON
            else:
                print("Error: No response from the model.")
        except json.JSONDecodeError:
            print(f"Attempt {attempt+1}/{max_retries}: Response not valid JSON. Retrying...")
        except Exception as e:
            print(f"Error on attempt {attempt+1}: {str(e)}. Retrying...")

        attempt += 1
        time.sleep(delay)

    return None  # If all retries fail, return None

**view_transformer**

In [22]:
import numpy as np
import cv2

class ViewTransformer():
    def __init__(self):
        court_width = 68
        court_length = 23.32

        self.pixel_vertices = np.array([[110, 1035], 
                               [265, 275], 
                               [910, 260], 
                               [1640, 915]])
        
        self.target_vertices = np.array([
            [0, court_width],
            [0, 0],
            [court_length, 0],
            [court_length, court_width]
        ])

        self.pixel_vertices = self.pixel_vertices.astype(np.float32)
        self.target_vertices = self.target_vertices.astype(np.float32)

        self.persepctive_trasnformer = cv2.getPerspectiveTransform(self.pixel_vertices, self.target_vertices)

    def transform_point(self, point):
        p = (int(point[0]), int(point[1]))
        is_inside = cv2.pointPolygonTest(self.pixel_vertices, p, False) >= 0
        if not is_inside:
            return None

        reshaped_point = point.reshape(-1, 1, 2).astype(np.float32)
        tranform_point = cv2.perspectiveTransform(reshaped_point, self.persepctive_trasnformer)
        return tranform_point.reshape(-1, 2)

    def add_transformed_position_to_tracks(self, tracks):
        for object, object_tracks in tracks.items():
            for frame_num, track in enumerate(object_tracks):
                for track_id, track_info in track.items():
                    # Check if 'position_adjusted' exists in track_info
                    if 'position_adjusted' not in track_info:
                        print(f"'position_adjusted' not found for track_id {track_id} in frame {frame_num}. Skipping...")
                        continue

                    position = track_info['position_adjusted']
                    position = np.array(position)
                    position_trasnformed = self.transform_point(position)
                    if position_trasnformed is not None:
                        position_trasnformed = position_trasnformed.squeeze().tolist()

                    # Add the transformed position to the track
                    tracks[object][frame_num][track_id]['position_transformed'] = position_trasnformed


**new_data_handler**

In [23]:
import cv2
from ultralytics import YOLO

class YOLOVideoProcessor:
    def __init__(self, model_path, class_thresholds):
        """
        Initialize the YOLOVideoProcessor with a model and class-specific thresholds.
        :param model_path: Path to the YOLO weights file.
        :param class_thresholds: Dictionary mapping class IDs to confidence thresholds.
        """
        # Load the YOLO model
        self.model = YOLO(model_path)
        # Set the class-specific confidence thresholds
        self.class_thresholds = class_thresholds

    def process_frames_combined(self, video_frames):
        """
        Process the video frames and return two sets of YOLO detections:
        1. Detections for classes 0 to 6 (excluding 2 and 3).
        2. Detections only for class 2 and class 3.
        
        :param video_frames: List of video frames (as images).
        :return: A tuple of two lists:
                 - The first list contains filtered detections for classes 0 to 6 (excluding classes 2 and 3).
                 - The second list contains detections for classes 2 and 3.
                 Each detection is a list with full detection info [xmin, ymin, xmax, ymax, confidence, class_id, ...].
        """
        filtered_detections = []           # List for classes 0 to 6 (excluding 2 and 3)
        detections_for_classes_2_and_3 = []  # List for class 2 and 3

        for frame in video_frames:
            # Run YOLO model on the frame
            results = self.model(frame)

            # Extract bounding box data from the results, move to CPU, then convert to NumPy
            df_results = results[0].boxes.data.cpu().numpy()  # Use .cpu() to move to host memory

            # Filter detections
            frame_filtered_detections = []
            frame_detections_2_and_3 = []
            for detection in df_results:
                class_id = int(detection[5])  # Class ID is the 6th column
                confidence = detection[4]     # Confidence score is the 5th column
                
                # Apply threshold for valid classes (skip if confidence is below the threshold)
                if class_id in self.class_thresholds and confidence >= self.class_thresholds[class_id]:
                    if class_id not in [2, 3]:  # Filter for classes 0 to 6 (excluding 2 and 3)
                        frame_filtered_detections.append(detection.tolist())
                    elif class_id in [2, 3]:    # Filter for class 2 and class 3
                        frame_detections_2_and_3.append(detection.tolist())

            # Append the filtered detections for this frame
            filtered_detections.append(frame_filtered_detections)
            detections_for_classes_2_and_3.append(frame_detections_2_and_3)

        return filtered_detections, detections_for_classes_2_and_3


**player_stats**

In [24]:
import pandas as pd
from collections import Counter

class PlayerStats: 
    def __init__(self, input_df):
        # Initialize the input DataFrame
        self.input_df = input_df
        
        # Define the desired column structure for the final output (for reference only)
        self.columns = [
            'shirtNumber', 'position', 'goals', 'total_shots', 'shots_on_target', 
            'shots_off_target', 'blocked_shots', 'saved_shots', 'total_passes', 
            'accurate_passes', '%_pass_success', 'key_passes', 'dribbles_attempted', 
            'dribbles_past', 'dribbles', '%_dribbles_success', 'aerial_duels', 
            '%_aerial_success', 'offensive_aerials', 'defensive_aerials', 
            'tackles_attempted', 'tackles_won', '%_tackles_success', 'clearances', 
            'interceptions', 'injuries', 'distance_covered', 'avg_speed', 'highest_speed', 
            'team', 'team_color'
        ]

    def process_data(self):
        # Step 1: Drop rows where shirt_number is 0
        self.input_df = self.input_df[self.input_df['shirt_number'] != 0]
        
        # Step 2: Combine rows based on 'shirt_number' and 'team' by summing all columns
        grouped = self.input_df.groupby(['shirt_number', 'team']).agg({
            'Position': 'first',  # Assuming 'Position' does not change for the same 'shirt_number'
            'Goals': 'sum',
            'Assists': 'sum',
            'Total_shots': 'sum',
            'Shots_on_Target': 'sum',
            'Shots_off_Target': 'sum',
            'Blocked_shots': 'sum',
            'Saved_shots': 'sum',
            'Total_passes': 'sum',
            'pass_failure': 'sum',
            'Pass_Success': 'sum',
            'Key_passes': 'sum',
            'dribble_attempt': 'sum',
            'dribble_failure': 'sum',
            'dribble_success': 'sum',
            'offensive_failure': 'sum',
            'defensive_failure': 'sum',
            'offensive_success': 'sum',
            'defensive_success': 'sum',
            'Tackles_attempted': 'sum',
            'tackling_failure': 'sum',
            'tackling_success': 'sum',
            'Clearances': 'sum',
            'Interceptions': 'sum',
            'injuries': 'sum',
            'Distance_covered': 'sum',
            'Avg_speed': lambda x: x[x != 0].mean() if (x != 0).any() else 0,  # Mean of non-zero values or 0
            'Highest_speed': 'max',  # Max of highest_speed
            'dribbled_past': 'sum'
        }).reset_index()

        # Step 3: Assign the team_color column manually
        grouped['team_color'] = grouped.apply(lambda row: self.handle_team_color(self.input_df[(self.input_df['shirt_number'] == row['shirt_number']) & 
                                                                                               (self.input_df['team'] == row['team'])]['team_color'], grouped), axis=1)
    
        print(f"grouped length :{len(grouped)}")

        # Step 4: Create team_1 and team_2 DataFrames
        team_1, team_2 = self.create_teams(grouped)

        return team_1, team_2

    def handle_team_values(self, team_series):
        """
        Handle missing team values: If all team values are 0, assign to team 1.
        """
        # If all values are 0, return team 1
        if (team_series == 0).all():
            return 1
        else:
            # Otherwise, return the most frequent non-zero value
            print(f"team: {Counter([i for i in team_series if i != 0]).most_common(1)[0][0]}")
            return Counter([i for i in team_series if i != 0]).most_common(1)[0][0]

    def handle_team_color(self, team_color_series, grouped_df):
        """
        Handle missing team_color values: Save the first non-zero/non-null value, 
        or return 'Unknown' if no valid colors are found.
        """
        print(f"Processing team color series: {team_color_series}")
        
        # Filter out empty or null values
        valid_colors = team_color_series[team_color_series.notnull() & (team_color_series != 0)]
        
        if len(valid_colors) > 0:
            # Return the first valid color
            print(f"Selected team color: {valid_colors.iloc[0]}")
            return valid_colors.iloc[0]
        else:
            # Return a fallback value if no valid colors are found
            print("No valid team color found, returning 'Unknown'")
            return "Unknown"
        
    def create_teams(self, grouped_df):
        # Split the combined DataFrame into two based on the 'team' column (values 1 and 2)
        team_1 = grouped_df[grouped_df['team'] == 1]
        print(f"team_1 {len(team_1)}")
        team_2 = grouped_df[grouped_df['team'] == 2]
        print(f"team_2 {len(team_2)}")
        # Map columns and return final DataFrames
        team_1_df = self.map_columns(team_1)
        team_2_df = self.map_columns(team_2)
        
        return team_1_df, team_2_df

    def map_columns(self, df):
        # Create a new DataFrame with mapped columns and calculations
        new_df = pd.DataFrame()

        # Direct mappings
        new_df['shirtNumber'] = df['shirt_number']
        new_df['position'] = df['Position']
        new_df['goals'] = df['Goals']
        new_df['total_shots'] = df['Total_shots']
        new_df['shots_on_target'] = df['Shots_on_Target']
        new_df['shots_off_target'] = df['Shots_off_Target']
        new_df['blocked_shots'] = df['Blocked_shots']
        new_df['saved_shots'] = df['Saved_shots']
        new_df['total_passes'] = df['Total_passes']
        new_df['accurate_passes'] = df['Pass_Success']
        new_df['key_passes'] = df['Key_passes']
        new_df['dribbles_attempted'] = df['dribble_attempt']
        new_df['dribbles_past'] = df['dribbled_past']
        new_df['dribbles'] = df['dribble_success']
        new_df['tackles_attempted'] = df['Tackles_attempted']
        new_df['tackles_won'] = df['tackling_success']
        new_df['clearances'] = df['Clearances']
        new_df['interceptions'] = df['Interceptions']
        new_df['injuries'] = df['injuries']
        new_df['distance_covered'] = df['Distance_covered']
        new_df['avg_speed'] = df['Avg_speed']
        new_df['highest_speed'] = df['Highest_speed']
        new_df['team'] = df['team']
        new_df['team_color'] = df['team_color']

        # Calculations
        new_df['%_pass_success'] = (df['Pass_Success'] / df['Total_passes']) * 100
        new_df['%_dribbles_success'] = (df['dribble_success'] / df['dribble_attempt']) * 100
        new_df['aerial_duels'] = (df['offensive_success'] + df['defensive_success'])
        new_df['%_aerial_success'] = new_df['aerial_duels'] / (df['offensive_failure'] + df['defensive_failure'] +
                                      df['offensive_success'] + df['defensive_success']) * 100
        new_df['offensive_aerials'] = df['offensive_success'] + df['offensive_failure']
        new_df['defensive_aerials'] = df['defensive_success'] + df['defensive_failure']
        new_df['%_tackles_success'] = df['tackling_success'] / df['Tackles_attempted'] * 100
        new_df = new_df.fillna(0)
        
        return new_df


**team_stats**

In [25]:
import pandas as pd

class SoccerMatchDataProcessorFullWithSubs:
    def __init__(self, df_team1, df_team2, df_match_info):
        # Store the dataframes for both teams and the match info
        self.df_team1 = df_team1
        self.df_team2 = df_team2
        self.df_match_info = df_match_info

        # Define the columns for the final DataFrame
        self.final_columns = [
            'id_match', 'formations', 'score', 'goals', 'goals_past', 
            'total_shots', 'shots_on_target', 'shots_off_target', 'blocked_shots', 
            'total_possession', 'total_passes', 'accurate_passes', 'pass_success', 
            'key_passes', 'dribbles_attempted', 'dribbles', 'dribbles_past', 'dribbles_success', 
            'aerials_won', 'aerial_success', 'offensive_aerials', 'defensive_aerials', 
            'tackles', 'tackle_success', 'tackles_attempted', 'clearances', 
            'interceptions', 'corners', 'win', 'draw', 'lose', 'team_color',
            'substitution_1', 'substitution_2', 'substitution_3', 'substitution_4', 'substitution_5'
        ]

        # Initialize the final DataFrame
        self.final_df = pd.DataFrame(columns=self.final_columns)

    def calculate_team_stats(self, team_df):
        # Calculate the necessary statistics for a team
        stats = {
            'goals': team_df['goals'].sum(),
            'total_shots': team_df['total_shots'].sum(),
            'shots_on_target': team_df['shots_on_target'].sum(),
            'shots_off_target': team_df['shots_off_target'].sum(),
            'blocked_shots': team_df['blocked_shots'].sum(),
            'total_passes': team_df['total_passes'].sum(),
            'accurate_passes': team_df['accurate_passes'].sum(),
            'pass_success': (team_df['accurate_passes'].sum() / team_df['total_passes'].sum()) * 100 if team_df['total_passes'].sum() > 0 else 0,
            'key_passes': team_df['key_passes'].sum(),
            'dribbles_attempted': team_df['dribbles_attempted'].sum(),
            'dribbles': team_df['dribbles'].sum(),
            'dribbles_past': team_df['dribbles_past'].sum(),
            'dribbles_success': (team_df['dribbles'].sum() / team_df['dribbles_attempted'].sum()) * 100 if team_df['dribbles_attempted'].sum() > 0 else 0,
            'aerials_won': team_df['aerial_duels'].sum(),
            'aerial_success': team_df['%_aerial_success'].mean(),
            'offensive_aerials': team_df['offensive_aerials'].sum(),
            'defensive_aerials': team_df['defensive_aerials'].sum(),
            'tackles': team_df['tackles_won'].sum(),
            'tackle_success': team_df['%_tackles_success'].mean(),
            'tackles_attempted': team_df['tackles_attempted'].sum(),
            'clearances': team_df['clearances'].sum(),
            'interceptions': team_df['interceptions'].sum(),
            'team_color': team_df.loc[team_df['team_color'] != 'Unknown', 'team_color'].iloc[0] if not team_df[team_df['team_color'] != 'Unknown'].empty else 'Unknown'
        }
        return stats

    def process_match_data(self):
        # Process match data for both teams
        
        # Extract team 1 and 2 data from match info and ensure proper casting of formations and substitutions
        team1_info = self.df_match_info.iloc[0]
        team2_info = self.df_match_info.iloc[1]

        # Handle string types for formations and substitutions by ensuring they are strings
        team1_info['formations'] = str(team1_info['formations'])
        team2_info['formations'] = str(team2_info['formations'])

        # Substitution columns may also need to be cast to strings
        substitution_columns = ['substitution_1', 'substitution_2', 'substitution_3', 'substitution_4', 'substitution_5']
        for col in substitution_columns:
            team1_info[col] = str(team1_info[col])
            team2_info[col] = str(team2_info[col])

        # Calculate stats for team 1
        team1_stats = self.calculate_team_stats(self.df_team1)
        team1_stats.update({
            'formations': team1_info['formations'],
            'corners': team1_info['corners'],
            'substitution_1': team1_info['substitution_1'],
            'substitution_2': team1_info['substitution_2'],
            'substitution_3': team1_info['substitution_3'],
            'substitution_4': team1_info['substitution_4'],
            'substitution_5': team1_info['substitution_5'],
            'id_match': 1,  # Assign the match ID
            'win': 0, 'draw': 0, 'lose': 0
        })

        # Calculate stats for team 2
        team2_stats = self.calculate_team_stats(self.df_team2)
        team2_stats.update({
            'formations': team2_info['formations'],
            'corners': team2_info['corners'],
            'substitution_1': team2_info['substitution_1'],
            'substitution_2': team2_info['substitution_2'],
            'substitution_3': team2_info['substitution_3'],
            'substitution_4': team2_info['substitution_4'],
            'substitution_5': team2_info['substitution_5'],
            'id_match': 1,  # Assign the match ID
            'win': 0, 'draw': 0, 'lose': 0
        })

        # Calculate total possession based on total passes
        total_passes = team1_stats['total_passes'] + team2_stats['total_passes']
        team1_stats['total_possession'] = (team1_stats['total_passes'] / total_passes) * 100 if total_passes > 0 else 0
        team2_stats['total_possession'] = (team2_stats['total_passes'] / total_passes) * 100 if total_passes > 0 else 0

        # Determine goals past for each team
        team1_stats['goals_past'] = team2_stats['goals']
        team2_stats['goals_past'] = team1_stats['goals']

        # Assign the score in the format "our goals - their goals"
        team1_stats['score'] = f"{team1_stats['goals']} - {team2_stats['goals']}"
        team2_stats['score'] = f"{team2_stats['goals']} - {team1_stats['goals']}"

        # Determine the result based on goals
        if team1_stats['goals'] > team2_stats['goals']:
            team1_stats['win'] = 1
            team2_stats['lose'] = 1
        elif team1_stats['goals'] < team2_stats['goals']:
            team2_stats['win'] = 1
            team1_stats['lose'] = 1
        else:
            team1_stats['draw'] = 1
            team2_stats['draw'] = 1

        # Use pd.concat instead of append to add the rows
        self.final_df = pd.concat([self.final_df, pd.DataFrame([team1_stats])], ignore_index=True)
        self.final_df = pd.concat([self.final_df, pd.DataFrame([team2_stats])], ignore_index=True)

        # Fill NaN values with 0 for missing data
        self.final_df = self.final_df.fillna(0)

        return self.final_df

**recommender_systems**

In [26]:
import pandas as pd
from sklearn.metrics.pairwise import euclidean_distances
from pulp import LpProblem, LpVariable, lpSum, LpMaximize
from collections import Counter
import numpy as np
import difflib

class MyPlayerStats:
    def __init__(self, input_df, correct_shirt_numbers, mobile_data_df):
        self.input_df = input_df
        self.correct_shirt_numbers = correct_shirt_numbers
        self.mobile_data_df = mobile_data_df  # The mobile data DataFrame
        self.columns = [
            'position', 'shirtNumber', 'goals', 'total_shots',
            'shots_on_target', 'shots_off_target', 'blocked_shots', 'saved_shots', 'total_passes', 'accurate_passes',
            '%_pass_success', 'key_passes', 'dribbles_attempted', 'dribbles', '%_dribbles_success', 'dribbles_past',
            'aerial_duels', '%_aerial_success', 'offensive_aerials', 'defensive_aerials', 'tackles_attempted',
            'tackles_won', '%_tackles_success', 'clearances', 'interceptions', 'injuries', 'distance_covered',
            'avg_speed', 'highest_speed'
        ]

    def correct_shirt_numbers_func(self):
        def find_closest_shirt_number(incorrect_number):
            correct_number = difflib.get_close_matches(str(incorrect_number), self.correct_shirt_numbers, n=1, cutoff=0.6)
            return correct_number[0] if correct_number else incorrect_number

        self.input_df['corrected_shirt_number'] = self.input_df['shirtNumber'].apply(find_closest_shirt_number)

    def process_data(self):
        self.correct_shirt_numbers_func()

        # Group the data as before
        grouped = self.input_df.groupby(['corrected_shirt_number']).agg({
            'position': 'first',
            'goals': 'sum',
            'total_shots': 'sum',
            'shots_on_target': 'sum',
            'shots_off_target': 'sum',
            'blocked_shots': 'sum',
            'saved_shots': 'sum',
            'total_passes': 'sum',
            'accurate_passes': 'sum',
            '%_pass_success': lambda x: x[x != 0].mean() if (x != 0).any() else 0,
            'key_passes': 'sum',
            'dribbles_attempted': 'sum',
            'dribbles': 'sum',
            '%_dribbles_success': lambda x: x[x != 0].mean() if (x != 0).any() else 0,
            'dribbles_past': 'sum',
            'aerial_duels': 'sum',
            '%_aerial_success': lambda x: x[x != 0].mean() if (x != 0).any() else 0,
            'offensive_aerials': 'sum',
            'defensive_aerials': 'sum',
            'tackles_attempted': 'sum',
            'tackles_won': 'sum',
            '%_tackles_success': lambda x: x[x != 0].mean() if (x != 0).any() else 0,
            'clearances': 'sum',
            'interceptions': 'sum',
            'injuries': 'sum',
            'distance_covered': 'sum',
            'avg_speed': lambda x: x[x != 0].mean() if (x != 0).any() else 0,
            'highest_speed': 'max',
        }).reset_index()

        # After processing, assign the 'position' from the mobile_data_df
        grouped = self.assign_position_from_mobile_data(grouped)

        return grouped

    def assign_position_from_mobile_data(self, grouped_df):
        # Convert both columns to string for a proper merge
        grouped_df['corrected_shirt_number'] = grouped_df['corrected_shirt_number'].astype(str)
        self.mobile_data_df['Shirt_Number'] = self.mobile_data_df['Shirt_Number'].astype(str)
        # Merge grouped_df with mobile_data_df on the shirt number to get the 'position' from mobile_data_df
        # Ensure 'shirt_number' and 'corrected_shirt_number' columns are used correctly
        grouped_df = grouped_df.merge(self.mobile_data_df[['Shirt_Number', 'Position']],
                                      left_on='corrected_shirt_number', right_on='Shirt_Number',
                                      how='left')

        # Assign the position from the mobile data and drop the temporary column
        grouped_df['position'] = grouped_df['Position']
        grouped_df.drop(columns=['Position'], inplace=True)

        return grouped_df

# Define FirstModel class
class FirstModel:
    def __init__(self, teams, data_cleaned):
        self.teams = teams
        self.data_cleaned = data_cleaned
        self.features_to_compare = [
            'goals', 'goals_past', 'total_shots', 'shots_on_target', 'shots_off_target',
            'blocked_shots', 'total_possession', 'total_passes', 'accurate_passes',
            'pass_success', 'key_passes', 'dribbles_attempted', 'dribbles',
            'dribbles_past', 'dribbles_success', 'aerials_won', 'aerial_success',
            'offensive_aerials', 'defensive_aerials', 'tackles', 'tackle_success',
            'tackles_attempted', 'clearances', 'interceptions', 'corners', 'defenders',
            'midfielders', 'attackers', 'advanced_midfielders'
        ]

    def split_formation(self, df):
        # Apply the function to the 'formations' column
        df['formations'] = df['formations'].apply(correct_formation_format)
        # Split the formations column and fill any null values with the default formation "4-3-3"
        formation_split = df['formations'].fillna('4-3-3').str.split('-')
        
        # Rename the columns in the DataFrame to the original names
        df.rename(columns={
            '%_pass_success': 'pass_success',
            '%_tackles_success': 'tackles_success',
            '%_aerial_success': 'aerial_success',
            '%_dribbles_success': 'dribbles_success'
        }, inplace=True)

        # Ensure missing or invalid values are handled by defaulting to "4-3-3"
        df['defenders'] = formation_split.str[0].fillna(4).astype(int)
        df['midfielders'] = formation_split.str[1].fillna(3).astype(int)
        df['attackers'] = formation_split.str[-1].fillna(3).astype(int)

        # Handle cases where there are advanced midfielders (third value in the split)
        # If the length of the split is less than 3, default to no advanced midfielders (0)
        df['advanced_midfielders'] = formation_split.apply(lambda x: int(x[2]) if len(x) > 3 and pd.notna(x[2]) else 0)

        return df

    def find_similar_rows(self):
        lost_games = self.data_cleaned[self.data_cleaned['lose'] == 1]
        lost_games = self.split_formation(lost_games)

        input_row = self.teams.iloc[0].to_dict()
        input_df = pd.DataFrame([input_row])
        input_df = self.split_formation(input_df)
        filtered_lost_games = lost_games[self.features_to_compare]
        distances = euclidean_distances(filtered_lost_games, input_df[self.features_to_compare])
        lost_games['distance'] = distances

        top_n = 100
        similar_rows = lost_games.sort_values(by='distance').head(top_n)
        similar_rows = similar_rows.drop(columns=['distance'])

        return similar_rows

    def find_winning_rows(self, similar_rows):
        lost_game_ids = similar_rows['id_match'].unique()
        winning_rows = self.data_cleaned[(self.data_cleaned['id_match'].isin(lost_game_ids)) & (self.data_cleaned['win'] == 1)]
        games = winning_rows

        games = self.split_formation(games)

        input_row = self.teams.iloc[1].to_dict()
        input_df = pd.DataFrame([input_row])
        input_df = self.split_formation(input_df)

        filtered_games = games[self.features_to_compare]
        distances = euclidean_distances(filtered_games, input_df[self.features_to_compare])
        games['distance'] = distances

        top_n = 10
        similar_rows = games.sort_values(by='distance').head(top_n)
        similar_rows = similar_rows.drop(columns=['distance'])

        return similar_rows


# Define SecondModel class
class SecondModel:
    def __init__(self, input_row, player_data):
        self.input_row = input_row
        self.player_data = player_data
        self.STATS_TO_MAXIMIZE = {
            'Goalkeeper': ['aerial_success', 'blocked_shots'],
            'Right-back': ['tackles_attempted', 'tackle_success', 'clearances', 'interceptions', 'dribbles_success', 'aerial_success', 'pass_success'],
            'Left-back': ['tackles_attempted', 'tackle_success', 'clearances', 'interceptions', 'dribbles_success', 'aerial_success', 'pass_success'],
            'Center-back': ['tackles_attempted', 'tackle_success', 'clearances', 'interceptions', 'aerial_success', 'pass_success', 'defensive_aerials'],
            'Defensive Midfielder': ['tackles_attempted', 'interceptions', 'pass_success', 'key_passes', 'defensive_aerials'],
            'Central Midfielder': ['total_passes', 'accurate_passes', 'key_passes', 'dribbles_attempted', 'dribbles_success', 'pass_success', 'dribbles'],
            'Attacking Midfielder': ['goals', 'key_passes', 'dribbles_attempted', 'dribbles_success', 'pass_success', 'shots_on_target', 'dribbles', 'offensive_aerials'],
            'Right Winger': ['goals', 'assists', 'key_passes', 'dribbles_attempted', 'dribbles_success', 'crossing_accuracy', 'shots_on_target', 'pass_success', 'dribbles'],
            'Left Winger': ['goals', 'assists', 'key_passes', 'dribbles_attempted', 'dribbles_success', 'crossing_accuracy', 'shots_on_target', 'pass_success', 'dribbles'],
            'Striker': ['goals', 'shots_on_target', 'total_shots', 'key_passes', 'aerial_success', 'dribbles_success', 'pass_success', 'offensive_aerials'],
            'Center Forward': ['goals', 'shots_on_target', 'total_shots', 'key_passes', 'aerial_success', 'dribbles_success', 'pass_success', 'offensive_aerials']
        }
        self.STATS_TO_MINIMIZE = {
            'Goalkeeper': ['goals_past', 'dribbles_past'],
            'Right-back': ['dribbles_past', 'injuries'],
            'Left-back': ['dribbles_past', 'injuries'],
            'Center-back': ['dribbles_past'],
            'Defensive Midfielder': ['dribbles_past', 'injuries'],
            'Central Midfielder': ['injuries'],
            'Attacking Midfielder': ['shots_off_target'],
            'Right Winger': ['shots_off_target', 'injuries'],
            'Left Winger': ['shots_off_target', 'injuries'],
            'Striker': ['shots_off_target', 'injuries'],
            'Center Forward': ['shots_off_target', 'injuries']
        }
        self.ABSENT_FEATURES_TO_MAXIMIZE = {
            'Goalkeeper': ['saved_shots', 'avg_speed', 'highest_speed', 'aerial_duels'],
            'Right-back': ['distance_covered', 'avg_speed', 'highest_speed', 'tackles_won'],
            'Left-back': ['distance_covered', 'avg_speed', 'highest_speed', 'tackles_won'],
            'Center-back': ['distance_covered', 'avg_speed', 'highest_speed', 'tackles_won'],
            'Defensive Midfielder': ['distance_covered', 'aerial_duels', 'tackles_won'],
            'Central Midfielder': ['distance_covered', 'aerial_duels'],
            'Attacking Midfielder': ['highest_speed'],
            'Right Winger': ['highest_speed', 'distance_covered'],
            'Left Winger': ['highest_speed', 'distance_covered'],
            'Striker': ['aerial_duels'],
            'Center Forward': ['aerial_duels']
        }
        self.FEATURE_INDEX_MAP = {
            'goals': 0,
            'total_shots': 1,
            'shots_on_target': 2,
            'shots_off_target': 3,
            'blocked_shots': 4,
            'total_passes': 5,
            'accurate_passes': 6,
            'pass_success': 7,
            'key_passes': 8,
            'dribbles_attempted': 9,
            'dribbles': 10,
            'dribbles_success': 11,
            'dribbles_past': 12,
            'aerial_success': 13,
            'tackles_attempted': 14,
            'tackles_success': 15,
            'clearances': 16,
            'interceptions': 17,
            'offensive_aerials': 18,
            'defensive_aerials': 19,
            'distance_covered': 20,
            'avg_speed': 21,
            'highest_speed': 22,
            'saved_shots': 23,
            'aerial_duels': 24,
            'tackles_won': 25
        }

    def count_players_per_position(self, formation):
        formation_numbers = list(map(int, formation.split('-')))
        position_groups = {
            'defender': ['Right-back', 'Left-back', 'Center-back'],
            'midfielder': ['Defensive Midfielder', 'Central Midfielder', 'Attacking Midfielder', 'Right Winger', 'Left Winger'],
            'forward': ['Striker', 'Center Forward']
        }
        position_counter = Counter()
        num_defenders = formation_numbers[0]
        num_midfielders = formation_numbers[1] if len(formation_numbers) > 1 else 0
        num_midfielders_advanced = formation_numbers[2] if len(formation_numbers) > 3 else 0
        num_forwards = formation_numbers[-1]

        if num_defenders == 4:
            position_counter['Center-back'] = 2
            position_counter['Right-back'] = 1
            position_counter['Left-back'] = 1
        elif num_defenders == 3:
            position_counter['Center-back'] = 3
        elif num_defenders == 5:
            position_counter['Center-back'] = 3
            position_counter['Right-back'] = 1
            position_counter['Left-back'] = 1

        if num_midfielders == 4:
            position_counter['Central Midfielder'] = 2
            position_counter['Right Winger'] = 1
            position_counter['Left Winger'] = 1
        elif num_midfielders == 3:
            position_counter['Central Midfielder'] = 2
            position_counter['Attacking Midfielder'] = 1
        elif num_midfielders == 5:
            position_counter['Central Midfielder'] = 3
            position_counter['Right Winger'] = 1
            position_counter['Left Winger'] = 1
        elif num_midfielders == 2:
            position_counter['Central Midfielder'] = 2
        elif num_midfielders == 1:
            position_counter['Central Midfielder'] = 1

        if num_midfielders_advanced > 0:
            position_counter['Attacking Midfielder'] += num_midfielders_advanced

        if num_forwards == 2:
            position_counter['Striker'] = 1
            position_counter['Center Forward'] = 1
        elif num_forwards == 1:
            position_counter['Striker'] = 1
        elif num_forwards >= 3:
            position_counter['Striker'] = 2
            position_counter['Center Forward'] = 1

        return position_counter

    def get_relevant_feature_indices(self, position):
        max_features = [self.FEATURE_INDEX_MAP[feat] for feat in self.STATS_TO_MAXIMIZE[position] if feat in self.FEATURE_INDEX_MAP]
        min_features = [self.FEATURE_INDEX_MAP[feat] for feat in self.STATS_TO_MINIMIZE[position] if feat in self.FEATURE_INDEX_MAP]
        return max_features, min_features

    def objective(self, player_vars, player_stats, target_stats, max_features, min_features, absent_features):
        team_max_stats = [lpSum(player_vars[i] * player_stats[i][j] for i in range(len(player_vars))) for j in max_features]
        team_min_stats = [lpSum(player_vars[i] * player_stats[i][j] for i in range(len(player_vars))) for j in min_features]
        absent_stats = [lpSum(player_vars[i] * player_stats[i][j] for i in range(len(player_vars))) for j in absent_features]

        maximize_term = lpSum(lpSum(team_max_stats[j] - target_stats[max_features[j]] for j in range(len(max_features))))
        maximize_absent_term = lpSum(absent_stats)
        minimize_term = lpSum(lpSum(target_stats[min_features[j]] - team_min_stats[j] for j in range(len(min_features))))

        return maximize_term + maximize_absent_term - minimize_term

    def recommend_team(self):
        formation = self.input_row['formations']
        position_counts = self.count_players_per_position(formation)
        self.player_data = self.player_data.reset_index(drop=True)

        num_goalkeepers = 1
        num_right_backs = position_counts['Right-back']
        num_left_backs = position_counts['Left-back']
        num_center_backs = position_counts['Center-back']
        num_defensive_midfielders = position_counts['Defensive Midfielder']
        num_central_midfielders = position_counts['Central Midfielder']
        num_attacking_midfielders = position_counts['Attacking Midfielder']
        num_right_wingers = position_counts['Right Winger']
        num_left_wingers = position_counts['Left Winger']
        num_strikers = position_counts['Striker']
        num_center_forwards = position_counts['Center Forward']
        self.player_data.columns = self.player_data.columns.str.lower()

        goalkeepers = self.player_data[self.player_data['position'].str.contains('Goalkeeper')].index
        right_backs = self.player_data[self.player_data['position'].str.contains('Right-back')].index
        left_backs = self.player_data[self.player_data['position'].str.contains('Left-back')].index
        center_backs = self.player_data[self.player_data['position'].str.contains('Center-back')].index
        defensive_midfielders = self.player_data[self.player_data['position'].str.contains('Defensive Midfielder')].index
        central_midfielders = self.player_data[self.player_data['position'].str.contains('Central Midfielder')].index
        attacking_midfielders = self.player_data[self.player_data['position'].str.contains('Attacking Midfielder')].index
        right_wingers = self.player_data[self.player_data['position'].str.contains('Right Winger')].index
        left_wingers = self.player_data[self.player_data['position'].str.contains('Left Winger')].index
        strikers = self.player_data[self.player_data['position'].str.contains('Striker')].index
        center_forwards = self.player_data[self.player_data['position'].str.contains('Center Forward')].index

        player_stats = self.player_data[['goals', 'total_shots', 'shots_on_target', 'shots_off_target', 'blocked_shots',
                                          'total_passes', 'accurate_passes', 'pass_success', 'key_passes', 'dribbles_attempted',
                                          'dribbles', 'dribbles_success', 'dribbles_past', 'aerial_success',
                                          'tackles_attempted', 'tackles_success',
                                          'clearances', 'interceptions', 'offensive_aerials', 'defensive_aerials',
                                          'injuries', 'distance_covered', 'avg_speed', 'highest_speed', 'saved_shots', 'aerial_duels',
                                          'tackles_won']].values

        target_stats = np.array([self.input_row['goals'], self.input_row['total_shots'], self.input_row['shots_on_target'],
                                 self.input_row['shots_off_target'], self.input_row['blocked_shots'], self.input_row['total_passes'],
                                 self.input_row['accurate_passes'], self.input_row['pass_success'], self.input_row['key_passes'], self.input_row['dribbles_attempted'],
                                 self.input_row['dribbles'], self.input_row['dribbles_success'], self.input_row['dribbles_past'],
                                 self.input_row['aerial_success'], self.input_row['tackles_attempted'], self.input_row['tackles_success'],
                                 self.input_row['clearances'], self.input_row['interceptions'], self.input_row['goals_past'],
                                 self.input_row['aerials_won'], self.input_row['offensive_aerials'], self.input_row['defensive_aerials'],
                                 self.input_row['tackles']])

        problem = LpProblem("Team_Selection", LpMaximize)
        player_vars = [LpVariable(f'player_{i}', lowBound=0, cat='Binary') for i in range(len(self.player_data))]

        problem += lpSum([player_vars[i] for i in goalkeepers]) == num_goalkeepers, "Goalkeeper_Constraint"
        problem += lpSum([player_vars[i] for i in right_backs]) == num_right_backs, "Right_Back_Constraint"
        problem += lpSum([player_vars[i] for i in left_backs]) == num_left_backs, "Left_Back_Constraint"
        problem += lpSum([player_vars[i] for i in center_backs]) == num_center_backs, "Center_Back_Constraint"
        problem += lpSum([player_vars[i] for i in defensive_midfielders]) == num_defensive_midfielders, "Defensive_Midfielder_Constraint"
        problem += lpSum([player_vars[i] for i in central_midfielders]) == num_central_midfielders, "Central_Midfielder_Constraint"
        problem += lpSum([player_vars[i] for i in attacking_midfielders]) == num_attacking_midfielders, "Attacking_Midfielder_Constraint"
        problem += lpSum([player_vars[i] for i in right_wingers]) == num_right_wingers, "Right_Winger_Constraint"
        problem += lpSum([player_vars[i] for i in left_wingers]) == num_left_wingers, "Left_Winger_Constraint"
        problem += lpSum([player_vars[i] for i in strikers]) == num_strikers, "Striker_Constraint"
        problem += lpSum([player_vars[i] for i in center_forwards]) == num_center_forwards, "Center_Forward_Constraint"

        injury_limit = 3
        for i in range(len(player_vars)):
            problem += player_vars[i] * self.player_data['injuries'].iloc[i] <= injury_limit, f'Injury_Constraint_{i}'

        positions = {
            'Goalkeeper': goalkeepers,
            'Right-back': right_backs,
            'Left-back': left_backs,
            'Center-back': center_backs,
            'Defensive Midfielder': defensive_midfielders,
            'Central Midfielder': central_midfielders,
            'Attacking Midfielder': attacking_midfielders,
            'Right Winger': right_wingers,
            'Left Winger': left_wingers,
            'Striker': strikers,
            'Center Forward': center_forwards
        }

        for position, indices in positions.items():
            max_features, min_features = self.get_relevant_feature_indices(position)
            absent_features = [self.FEATURE_INDEX_MAP[feat] for feat in self.ABSENT_FEATURES_TO_MAXIMIZE[position] if feat in self.FEATURE_INDEX_MAP]

            if len(indices) > 0:
                problem += self.objective(player_vars, player_stats, target_stats, max_features, min_features, absent_features)

        problem.solve()

        
        selected_players = self.player_data[[player_vars[i].varValue > 0 for i in range(len(self.player_data))]]
        team_stats = selected_players[['goals', 'total_shots', 'shots_on_target', 'shots_off_target', 'blocked_shots', 'saved_shots',
                                           'total_passes', 'accurate_passes', 'pass_success', 'key_passes', 'dribbles_attempted', 'dribbles',
                                           'dribbles_success', 'aerial_duels', 'aerial_success', 'offensive_aerials', 'defensive_aerials',
                                           'tackles_attempted', 'tackles_won', 'tackles_success', 'clearances', 'interceptions', 'injuries',
                                           'distance_covered', 'avg_speed', 'highest_speed']].sum()
        return selected_players, team_stats
       

**Static_page_generation_with_gemini**

In [27]:
import json

def generate_match_summary_prompt(my_team_info_str, opponent_info_str):
    """
    Generates a match summary prompt based on team and opponent data provided as strings.
    
    Args:
        my_team_info_str (str): String containing my team performance data.
        opponent_info_str (str): String containing opponent performance data.
        
    Returns:
        str: The structured match summary prompt formatted as JSON.
    """

    prompt = f"""
    ### Match Summary Request

    You are tasked with providing a comprehensive analysis of the match between my team and the opponent team based on the provided statistics. Use the following information to derive insights and training suggestions.

    **My Team Performance:**
    {my_team_info_str}

    **Opponent Performance:**
    {opponent_info_str}

    ### Output Format:
    Return your response as a JSON object with the following structure:

    {{
      "match_summary": {{
        "my_team_performance": {{
            "formation": "<Extract the formation from my team>",
            "score": "<Extract the score from my team>",
            "goals": <Extract the number of goals from my team>,
            "total_shots": <Extract total shots from my team>,
            "shots_on_target": <Extract shots on target from my team>,
            "total_possession": "<Extract total possession percentage from my team>",
            "accurate_passes": <Extract the number of accurate passes from my team>,
            "pass_success_rate": "<Extract the pass success percentage from my team>",
            "key_passes": <Extract the number of key passes from my team>,
            "dribbles_success_rate": "<Extract successful dribbles>/<attempted dribbles from my team>",
            "tackle_success_rate": "<Extract the tackle success percentage from my team>",
            "corners": <Extract the number of corners from my team>
        }},
        "opponent_performance": {{
            "formation": "<Extract the formation from opponent>",
            "score": "<Extract the score from opponent>",
            "goals": <Extract the number of goals from opponent>,
            "total_shots": <Extract total shots from opponent>,
            "shots_on_target": <Extract shots on target from opponent>,
            "total_possession": "<Extract total possession percentage from opponent>",
            "accurate_passes": <Extract the number of accurate passes from opponent>,
            "pass_success_rate": "<Extract the pass success percentage from opponent>",
            "key_passes": <Extract the number of key passes from opponent>,
            "dribbles_success_rate": "<Extract successful dribbles>/<attempted dribbles from opponent>",
            "tackle_success_rate": "<Extract the tackle success percentage from opponent>",
            "corners": <Extract the number of corners from opponent>
        }}
      }}
    }}

    Please summarize the key statistics for both teams based on the above data, highlight the strengths and weaknesses observed during the match, and provide insights into how each team performed relative to their strategies and tactics. Your response should be structured as specified in the output format.
    """

    return prompt


def generate_player_suggestions_prompt(best_formations_str, match_players_recommendations_str):
    """
    Generates a player suggestions prompt based on recommendations data provided as strings.
    
    Args:
        best_formations_str (str): String containing my team performance data.
        match_players_recommendations_str (str): String containing opponent performance data.
        
    Returns:
        str: The structured player_suggestions_prompt formatted as JSON.
    """

    prompt = f"""
    ### Player suggestions Request

    You are tasked with providing the best formation to play with, which is the first one in the provided data, and also player suggestions
    for the match based on the provided statistics. Use the following information to derive insights.

    **Best Formations:**
    {best_formations_str}

    **Player Suggestions:**
    {match_players_recommendations_str}

    ### Output Format:
    Return your response as a JSON object with the following structure:

    {{
      "recommendations_output": {{
        "best_formation": {{
            "formation": "<Extract the first formation from best_formation_str>"
        }},
        "players_recommendations": [
            {{
                "number": "<Extract the player shirt number from match_players_recommendations_str>",
                "position": "<Extract the position from match_players_recommendations_str>",
                "status": "<Extract the status from match_players_recommendations_str>"
            }},
            {{
                "number": "<Extract the next player shirt number>",
                "position": "<Extract the next player's position>",
                "status": "<Extract the next player's status>"
            }}
            // Repeat for all players in match_players_recommendations_str
        ]
      }}
    }}

    Your response should be structured as specified in the output format.
    """

    return prompt


def generate_opponent_analysis_prompt(opponent_info_str, opponent_players_str):
    """
    Generates a prompt for analyzing the opponent's strengths, weaknesses, and counter-strategies.
    
    Parameters:
        opponent_data (dict): Dictionary containing opponent match statistics.
        opponent_players_str (str): String containing individual data for opponent players.
        
    Returns:
        str: Generated prompt for opponent analysis.
    """
    prompt = f"""
    You are an expert football analyst. Based on the following data about the opponent's recent performances, provide a detailed analysis covering their strengths, weaknesses, and suggest counter-strategies.

    Opponent Data (for the match information):
    {opponent_info_str}


    Opponent Players Data (for each individual player):
    {opponent_players_str}

    Based on this data, analyze:
    1. The strengths of the opponent that can be exploited.
    2. The weaknesses that can be targeted.
    3. Recommended counter-strategies to implement against their play style.

    Provide a comprehensive and actionable analysis.
    """
    return prompt




def generate_training_suggestions_prompt(my_team_players_str, my_team_info_str, opponent_analysis_json):
    """
    Generates a prompt for providing training suggestions based on team players' data, team information,
    and the opponent's analysis (strengths, weaknesses, counter-strategies) as a single string.

    Parameters:
        my_team_players_str (str): String containing the team players' data.
        my_team_info_str (str): String containing the team information.
        opponent_analysis_json (dict): JSON object containing the opponent's analysis.

    Returns:
        str: Generated prompt for training suggestions.
    """
    
    # Convert opponent_analysis_json back to a string for embedding in the prompt
    opponent_analysis_str = json.dumps(opponent_analysis_json, indent=4)

    # Create a structured and detailed prompt using the opponent analysis as a string
    prompt = (
        "You are an expert football coach tasked with improving your team's performance through targeted training sessions. "
        "Please analyze the following data and provide detailed training suggestions in JSON format.\n\n"
        
        "### Team Players Data:\n"
        f"{my_team_players_str}\n\n"
        
        "### Team Information:\n"
        f"{my_team_info_str}\n\n"
        
        "### Opponent Analysis:\n"
        f"{opponent_analysis_str}\n\n"
        
        "### Task:\n"
        "1. Based on the weaknesses identified in the opponent's analysis, suggest a concise training session for the entire team. "
        "This session should specifically address these weaknesses to enhance your team's ability to exploit them during matches.\n"
        "2. For each player, provide a specific actionable drill tailored to their individual strengths and weaknesses, "
        "focusing on how these drills can help exploit the opponent's weaknesses.\n"
        
        "### Output Format:\n"
        "Return your suggestions as a JSON object with the following structure:\n"
        "{\n"
        "  \"team_training_session\": \"<training session suggestion>\",\n"
        "  \"individual_sessions\": [\n"
        "    {\n"
        "      \"player\": \"<player name>\",\n"
        "      \"shirt_number\": <shirt number>,\n"
        "      \"drill\": \"<specific drill for player>\"\n"
        "    },\n"
        "    ...\n"
        "  ]\n"
        "}\n"
        
        "### Notes:\n"
        "Be creative and ensure each suggestion is clear, actionable, and easy to implement.\n"
        "Focus on improving players' skills, tactical awareness, and overall team performance by specifically addressing the opponent's weaknesses."
    )
    
    return prompt

**FINALLY_MAIN**

In [28]:
import pandas as pd
import cv2
import gc

def main():
    # List of video file paths
    video_paths = [
        '/kaggle/input/input-vid/Untitled design.mp4',
        '/kaggle/input/input-vid/input_vid.mp4'
    ]

    # Loop through each video
    for video_index, video_path in enumerate(video_paths):
        # Initialize DataFrames and persistent objects for each video
        df = initialize_dataframe()  # Initialize empty DataFrame for players
        team_df = initialize_team_df()  # Initialize empty DataFrame for teams
        tracker = Tracker('/kaggle/input/old_data.pt/pytorch/default/1/old_data.pt')  # Initialize tracker
        team_assigner = TeamAssigner()
        
        # Set batch size
        batch_size = 200
        video_reader = cv2.VideoCapture(video_path)

        # Get total number of frames in the video
        total_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

        # Define possible formations
        possible_formations = ['4-3-3', '4-2-3-1', '4-3-2-1', '4-1-4-1', '3-5-2', '3-4-1-2', 
                               '4-4-2', '4-4-1-1', '5-4-1', '3-4-3', '4-1-2-1-2', '3-1-4-2', 
                               '3-4-2-1', '4-5-1', '4-3-1-2', '4-2-2-2', '3-5-1-1', '4-1-3-2', 
                               '5-3-2', '3-3-3-1', '4-2-4']
        i = 0
        # Loop through the video, processing batch_size frames at a time
        for start_frame in range(0, total_frames, batch_size):
            i += 1
            # Read a batch of frames
            video_frames = read_video_in_batches(video_reader, start_frame, batch_size)

            # If no frames were read, break the loop
            if len(video_frames) == 0:
                break

            # Process batch: Initialize per-batch objects and perform operations
            tracks = tracker.get_object_tracks(video_frames)  # Get object tracks for batch
            tracker.add_position_to_tracks(tracks)  # Add position to tracks

            # Camera movement estimator
            camera_movement_estimator = CameraMovementEstimator(video_frames[0])
            camera_movement_per_frame = camera_movement_estimator.get_camera_movement(video_frames)
            camera_movement_estimator.add_adjust_positions_to_tracks(tracks, camera_movement_per_frame)

            # Interpolate Ball Positions
            tracks["ball"] = tracker.interpolate_ball_positions(tracks["ball"])

            # View Transformer
            view_transformer = ViewTransformer()
            view_transformer.add_transformed_position_to_tracks(tracks)

            # Speed and Distance Estimation
            speed_and_distance_estimator = SpeedAndDistance_Estimator()
            df = speed_and_distance_estimator.update_df_with_speed_and_distance(tracks, df)

            # Team Assignment
            if i == 1:
                team_assigner.assign_team_color(video_frames[0], tracks['players'][0])
            
            for frame_num, player_track in enumerate(tracks['players']):
                for player_id, track in player_track.items():
                    team = team_assigner.get_player_team(video_frames[frame_num], track['bbox'], player_id)
                    tracks['players'][frame_num][player_id]['team'] = team
                    tracks['players'][frame_num][player_id]['team_color'] = team_assigner.team_colors[team]
                    
                    # Update DataFrame with team and team color
                    if player_id in df.index:
                        df.at[player_id, 'team'] = team
                        df.at[player_id, 'team_color'] = str(team_assigner.team_colors[team])
                    else:
                        df.loc[player_id] = {'team': team, 'team_color': str(team_assigner.team_colors[team])}

            # Ball Assignment
            player_assigner = PlayerBallAssigner()
            for frame_num, player_track in enumerate(tracks['players']):
                if frame_num < len(tracks['ball']):
                    ball_data_for_frame = tracks['ball'][frame_num]
                    if len(ball_data_for_frame) > 0 and 1 in ball_data_for_frame:
                        ball_bbox = ball_data_for_frame[1]['bbox']
                        assigned_player = player_assigner.assign_ball_to_player(player_track, ball_bbox)

                        if assigned_player != -1:
                            tracks['players'][frame_num][assigned_player]['has_ball'] = True

            # Pass Detection
            pass_detector = PassDetector(tracks, df)
            df = pass_detector.process_game_in_batches(batch_size=20)

            # YOLO Processor and Event Processing
            class_thresholds = {0: 0.8, 1: 0.7, 2: 0.3, 3: 0.1, 4: 0.7, 5: 0.6, 6: 0.85}
            yolo_processor = YOLOVideoProcessor('/kaggle/input/new_data.pt/pytorch/default/1/new_data.pt', class_thresholds)
            filtered_detections, detections_classes_2_and_3 = yolo_processor.process_frames_combined(video_frames)
            
            # Detect other events
            event_processor = EventProcessor(tracks, filtered_detections, df)
            df = event_processor.process_frames_in_batches()

            # Process Goal and Line Points
            processor = GoalAndLineProcessor()
            goals_and_lines_annotations = processor.get_goal_and_line_data(video_frames, detections_classes_2_and_3)

            # Detect shots, corners, saves, goals
            shot_detector = ShotDetector(tracks, df, team_df, goals_and_lines_annotations)
            df, team_df = shot_detector.process_frames_in_batches()

            # Initialize OCR
            player_number_tracker = PlayerShirtNumberTracker(video_frames, tracks, df,
                                           '/kaggle/input/ocr/pytorch/default/1/best (2).pt')

            # OCR: Detect player shirt numbers and update DataFrame
            df = player_number_tracker.run()

            # Initialize FormationDetector for each batch
            formation_detector = FormationDetector(tracks, possible_formations, team_df)

            # Formation Detection
            team_df = formation_detector.process_frames_in_batches()
            
            # Initialize SubstitutionDetector
            detector = SubstitutionDetector(class_thresholds, '/kaggle/input/substitution_board/pytorch/default/1/best (3).pt', team_df)
            # Run the extraction process
            ocr_results, team_df = detector.extract_annotation(video_frames, filtered_detections, tracks)
            
            # Delete batch-specific objects and free up memory
            del video_frames, camera_movement_estimator, view_transformer, speed_and_distance_estimator
            del player_assigner, pass_detector, yolo_processor, event_processor, processor, shot_detector
            del filtered_detections, player_number_tracker, formation_detector, detector, ocr_results

            # Force garbage collection
            gc.collect()

            # After processing all batches, fill in any missing data in DataFrames
            df = df.fillna(0)
            df.to_csv(f'/kaggle/working/player_statistics.csv', index=True)
            # Final statistics processing for teams and players
            player_stats = PlayerStats(df)
            team_1_df, team_2_df = player_stats.process_data()
        
            processor = SoccerMatchDataProcessorFullWithSubs(team_1_df, team_2_df, team_df)
            final_df = processor.process_match_data()

            # Save tracks and DataFrames to CSV files with unique names for each video
            output_suffix = f"_video_{video_index+1}"
            save_tracks_to_csv(tracks, csv_path=f'/kaggle/working/tracks_csv{output_suffix}.csv')
            df.to_csv(f'/kaggle/working/player_statistics{output_suffix}.csv', index=True)
            team_df.to_csv(f'/kaggle/working/team_statistics{output_suffix}.csv', index=True)
            team_1_df.to_csv(f'/kaggle/working/team_1_player_statistics{output_suffix}.csv', index=True)
            team_2_df.to_csv(f'/kaggle/working/team_2_player_statistics{output_suffix}.csv', index=True)
            final_df.to_csv(f'/kaggle/working/teams_final_statistics{output_suffix}.csv', index=True)
            print(f"BATCH DONE {i} for video {video_index+1}!")

        print(f"Processing completed for video {video_index+1} - {video_path}")

    print("All videos processed successfully!")

if __name__ == '__main__':
    main()


0: 384x640 2 balls, 18 players, 1 referee, 36.3ms
1: 384x640 2 balls, 16 players, 1 referee, 36.3ms
2: 384x640 2 balls, 16 players, 1 referee, 36.3ms
3: 384x640 4 balls, 17 players, 1 referee, 36.3ms
4: 384x640 3 balls, 16 players, 1 referee, 36.3ms
5: 384x640 4 balls, 17 players, 1 referee, 36.3ms
6: 384x640 1 ball, 16 players, 1 referee, 36.3ms
7: 384x640 2 balls, 15 players, 1 referee, 36.3ms
8: 384x640 2 balls, 15 players, 1 referee, 36.3ms
9: 384x640 1 ball, 16 players, 1 referee, 36.3ms
10: 384x640 3 balls, 16 players, 1 referee, 36.3ms
11: 384x640 3 balls, 16 players, 36.3ms
12: 384x640 2 balls, 17 players, 36.3ms
13: 384x640 2 balls, 15 players, 1 referee, 36.3ms
14: 384x640 2 balls, 15 players, 1 referee, 36.3ms
15: 384x640 1 ball, 15 players, 1 referee, 36.3ms
16: 384x640 1 ball, 15 players, 1 referee, 36.3ms
17: 384x640 2 balls, 16 players, 1 referee, 36.3ms
18: 384x640 2 balls, 17 players, 1 referee, 36.3ms
19: 384x640 1 ball, 15 players, 1 referee, 36.3ms
Speed: 2.7ms pre

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Processing team color series: 9    0
Name: team_color, dtype: object
No valid team color found, returning 'Unknown'
Processing team color series: 2    0
Name: team_color, dtype: object
No valid team color found, returning 'Unknown'
Processing team color series: 99    [     176.54      194.46       185.9]
Name: team_color, dtype: object
Selected team color: [     176.54      194.46       185.9]
Processing team color series: 93    [     43.947      55.794      133.73]
Name: team_color, dtype: object
Selected team color: [     43.947      55.794      133.73]
Processing team color series: 3    0
Name: team_color, dtype: object
No valid team color found, returning 'Unknown'
Processing team color series: 13    0
Name: team_color, dtype: object
No valid team color found, returning 'Unknown'
Processing team color series: 8    0
Name: team_color, dtype: object
No valid team color found, returning 'Unknown'
Processing team color series: 16    0
Name: team_color, dtype: object
No valid team color

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



0: 384x640 1 ball, 1 goalkeeper, 16 players, 1 referee, 34.8ms
1: 384x640 1 goalkeeper, 16 players, 1 referee, 34.8ms
2: 384x640 1 ball, 17 players, 1 referee, 34.8ms
3: 384x640 1 ball, 1 goalkeeper, 19 players, 1 referee, 34.8ms
4: 384x640 1 ball, 1 goalkeeper, 19 players, 1 referee, 34.8ms
5: 384x640 16 players, 1 referee, 34.8ms
6: 384x640 2 balls, 20 players, 34.8ms
7: 384x640 1 ball, 18 players, 1 referee, 34.8ms
8: 384x640 2 balls, 17 players, 1 referee, 34.8ms
9: 384x640 2 balls, 17 players, 1 referee, 34.8ms
10: 384x640 2 balls, 17 players, 1 referee, 34.8ms
11: 384x640 1 ball, 16 players, 1 referee, 34.8ms
12: 384x640 17 players, 1 referee, 34.8ms
13: 384x640 18 players, 1 referee, 34.8ms
14: 384x640 1 ball, 19 players, 1 referee, 34.8ms
15: 384x640 16 players, 1 referee, 34.8ms
16: 384x640 15 players, 1 referee, 34.8ms
17: 384x640 1 ball, 15 players, 1 referee, 34.8ms
18: 384x640 1 ball, 20 players, 1 referee, 34.8ms
19: 384x640 1 ball, 14 players, 1 referee, 34.8ms
Speed: 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



0: 384x640 17 players, 35.8ms
1: 384x640 15 players, 35.8ms
2: 384x640 14 players, 1 referee, 35.8ms
3: 384x640 1 goalkeeper, 14 players, 1 referee, 35.8ms
4: 384x640 1 goalkeeper, 13 players, 35.8ms
5: 384x640 12 players, 1 referee, 35.8ms
6: 384x640 15 players, 35.8ms
7: 384x640 13 players, 35.8ms
8: 384x640 14 players, 1 referee, 35.8ms
9: 384x640 14 players, 35.8ms
10: 384x640 17 players, 35.8ms
11: 384x640 16 players, 35.8ms
12: 384x640 1 ball, 17 players, 35.8ms
13: 384x640 1 ball, 1 goalkeeper, 14 players, 35.8ms
14: 384x640 15 players, 35.8ms
15: 384x640 1 ball, 16 players, 35.8ms
16: 384x640 1 ball, 1 goalkeeper, 16 players, 35.8ms
17: 384x640 1 ball, 1 goalkeeper, 15 players, 35.8ms
18: 384x640 1 ball, 1 goalkeeper, 13 players, 35.8ms
19: 384x640 1 goalkeeper, 15 players, 35.8ms
Speed: 1.5ms preprocess, 35.8ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 goalkeeper, 15 players, 1 referee, 34.8ms
1: 384x640 1 goalkeeper, 15 players, 2 referee

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Processing team color series: 391    [     64.527      70.416      90.623]
Name: team_color, dtype: object
Selected team color: [     64.527      70.416      90.623]
Processing team color series: 531    [     64.527      70.416      90.623]
Name: team_color, dtype: object
Selected team color: [     64.527      70.416      90.623]
Processing team color series: 396    0
439    0
Name: team_color, dtype: object
No valid team color found, returning 'Unknown'
Processing team color series: 388    [     64.527      70.416      90.623]
Name: team_color, dtype: object
Selected team color: [     64.527      70.416      90.623]
Processing team color series: 389    0
Name: team_color, dtype: object
No valid team color found, returning 'Unknown'
Processing team color series: 386    [     170.02      165.84      209.51]
Name: team_color, dtype: object
Selected team color: [     170.02      165.84      209.51]
Processing team color series: 518    [     170.02      165.84      209.51]
Name: team_color

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



0: 384x640 1 goalkeeper, 13 players, 1 referee, 36.9ms
1: 384x640 1 goalkeeper, 12 players, 3 referees, 36.9ms
2: 384x640 1 goalkeeper, 13 players, 2 referees, 36.9ms
3: 384x640 2 goalkeepers, 14 players, 36.9ms
4: 384x640 1 goalkeeper, 14 players, 36.9ms
5: 384x640 1 goalkeeper, 15 players, 36.9ms
6: 384x640 1 goalkeeper, 14 players, 36.9ms
7: 384x640 1 goalkeeper, 15 players, 36.9ms
8: 384x640 1 goalkeeper, 16 players, 36.9ms
9: 384x640 1 goalkeeper, 16 players, 36.9ms
10: 384x640 1 goalkeeper, 15 players, 36.9ms
11: 384x640 1 ball, 1 goalkeeper, 17 players, 36.9ms
12: 384x640 1 ball, 1 goalkeeper, 16 players, 36.9ms
13: 384x640 1 ball, 1 goalkeeper, 16 players, 36.9ms
14: 384x640 2 balls, 1 goalkeeper, 16 players, 36.9ms
15: 384x640 1 ball, 1 goalkeeper, 15 players, 36.9ms
16: 384x640 1 ball, 1 goalkeeper, 15 players, 36.9ms
17: 384x640 1 ball, 1 goalkeeper, 15 players, 36.9ms
18: 384x640 1 ball, 1 goalkeeper, 15 players, 36.9ms
19: 384x640 1 ball, 1 goalkeeper, 16 players, 36.9ms


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



0: 384x640 17 players, 37.4ms
1: 384x640 16 players, 37.4ms
2: 384x640 17 players, 37.4ms
3: 384x640 17 players, 1 referee, 37.4ms
4: 384x640 16 players, 1 referee, 37.4ms
5: 384x640 16 players, 37.4ms
6: 384x640 16 players, 37.4ms
7: 384x640 16 players, 37.4ms
8: 384x640 16 players, 37.4ms
9: 384x640 17 players, 1 referee, 37.4ms
10: 384x640 15 players, 2 referees, 37.4ms
11: 384x640 17 players, 1 referee, 37.4ms
12: 384x640 16 players, 1 referee, 37.4ms
13: 384x640 1 ball, 16 players, 1 referee, 37.4ms
14: 384x640 1 ball, 18 players, 1 referee, 37.4ms
15: 384x640 1 ball, 16 players, 37.4ms
16: 384x640 1 ball, 17 players, 37.4ms
17: 384x640 1 ball, 17 players, 2 referees, 37.4ms
18: 384x640 1 ball, 18 players, 1 referee, 37.4ms
19: 384x640 1 ball, 17 players, 37.4ms
Speed: 1.6ms preprocess, 37.4ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 18 players, 38.5ms
1: 384x640 1 ball, 15 players, 2 referees, 38.5ms
2: 384x640 1 ball, 16 players, 38.5

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [29]:
# Load the teams data (you may need to adjust file paths)
teams1 = pd.read_csv('/kaggle/working/teams_final_statistics_video_1.csv')
teams2 = pd.read_csv('/kaggle/working/teams_final_statistics_video_2.csv')

# Combine teams1 and teams2 into a single DataFrame
combined_teams = pd.concat([teams1, teams2], ignore_index=True)

# Load the data
mobile_data1 = pd.read_csv('/kaggle/input/mobile-data/mobile_data.csv')
mobile_data2 = pd.read_csv('/kaggle/input/mobile-data/mobile_data_2.csv')

correct_shirt_numbers = [str(num) for num in mobile_data1['Shirt_Number']]
correct_shirt_numbers2 = [str(num) for num in mobile_data2['Shirt_Number']]

player_data1 = pd.read_csv('/kaggle/working/team_1_player_statistics_video_1.csv')
player_data2 = pd.read_csv('/kaggle/working/team_2_player_statistics_video_1.csv')
player_data3 = pd.read_csv('/kaggle/working/team_1_player_statistics_video_2.csv')
player_data4 = pd.read_csv('/kaggle/working/team_2_player_statistics_video_2.csv')

player_data_dict = {
    'player_data1': player_data1,
    'player_data2': player_data2,
    'player_data3': player_data3,
    'player_data4': player_data4,
}

# Extract the first team's color from mobile_data1
first_team_color_mobile1 = clean_team_color(mobile_data1.iloc[0]['Team_Color '])

# Extract the opponent's team color from mobile_data2
first_team_color_mobile2 = clean_team_color(mobile_data2.iloc[0]['Team_Color '])

# Handle player data 1 and 2 based on mobile_data1
closest_player_data_1_2 = []
for player_data_key in ['player_data1', 'player_data2']:
    closest_player_dataset_key = find_closest_player_dataset(player_data_dict[player_data_key], first_team_color_mobile1, player_data_key)
    if closest_player_dataset_key:
        closest_player_data_1_2.append(player_data_dict[closest_player_dataset_key])

# Combine the closest data for player 1 and 2 based on mobile_data1
closest_player_data_mobile1 = pd.concat(closest_player_data_1_2, ignore_index=True)

# Process the player stats
player_stats = MyPlayerStats(closest_player_data_mobile1, correct_shirt_numbers, mobile_data1)
closest_player_data_mobile1 = player_stats.process_data()

# Correct the shirt numbers and drop the temporary column
closest_player_data_mobile1['shirt_number'] = closest_player_data_mobile1['corrected_shirt_number']
closest_player_data_mobile1.drop(columns=['corrected_shirt_number'], inplace=True)

closest_player_data_mobile1.to_csv('/kaggle/working/closest_player_data_mobile1.csv', index=False)

# Handle player data 3 and 4 based on mobile_data2
closest_player_data_3_4 = []
for player_data_key in ['player_data3', 'player_data4']:
    closest_player_dataset_key = find_closest_player_dataset(player_data_dict[player_data_key], first_team_color_mobile2, player_data_key)
    if closest_player_dataset_key:
        closest_player_data_3_4.append(player_data_dict[closest_player_dataset_key])

# Combine the closest data for player 3 and 4 based on mobile_data2
closest_player_data_mobile2 = pd.concat(closest_player_data_3_4, ignore_index=True)

# Process the player stats
player_stats = MyPlayerStats(closest_player_data_mobile2, correct_shirt_numbers2, mobile_data2)
closest_player_data_mobile2 = player_stats.process_data()

# Correct the shirt numbers and drop the temporary column
closest_player_data_mobile2['shirt_number'] = closest_player_data_mobile2['corrected_shirt_number']
closest_player_data_mobile2.drop(columns=['corrected_shirt_number'], inplace=True)

closest_player_data_mobile2.to_csv('/kaggle/working/closest_player_data_mobile2.csv', index=False)

# From here on, use only closest_player_data_mobile1 in the rest of the code

# Find the closest matching rows in teams
closest_row_team1 = find_closest_match(combined_teams, first_team_color_mobile1)
opponent_team_color = clean_team_color(mobile_data2.iloc[0]['Team_Color '])
closest_row_team2 = find_closest_match(combined_teams, opponent_team_color)

combined_closest_rows = pd.DataFrame([closest_row_team1, closest_row_team2])

my_team = pd.DataFrame([closest_row_team1])
opponent_team = pd.DataFrame([closest_row_team2])
my_team.to_csv('/kaggle/working/my_team.csv', index=False)
opponent_team.to_csv('/kaggle/working/opponent_team.csv', index=False)

# Run the models
teams = combined_closest_rows
data_cleaned = pd.read_csv('/kaggle/input/recommender-systems-data/data_cleaned.csv')

# Use the closest player data based on mobile data 1
player_data = closest_player_data_mobile1

player_data['shirt_number'] = player_data.pop('Shirt_Number')
player_data['pass_success'] = player_data.pop('%_pass_success')
player_data['dribbles_success'] = player_data.pop('%_dribbles_success')
player_data['aerial_success'] = player_data.pop('%_aerial_success')
player_data['tackles_success'] = player_data.pop('%_tackles_success')

# Initialize the first model
model1 = FirstModel(teams, data_cleaned)

# Find similar rows based on the first model
similar_rows = model1.find_similar_rows()

# Save recommended formations to CSV
recommended_formations = model1.find_winning_rows(similar_rows)
display(similar_rows)
recommended_formations.to_csv('/kaggle/working/recommended_formations.csv', index=False)


# Select the first match data row
match_data = recommended_formations
i = 0
solution = False
while i < 10 and not solution:
    input_row = match_data.iloc[i].to_dict()
    input_row['tackles_success'] = input_row.pop('tackle_success')

    # Initialize the second model to recommend a team based on input row and processed player data
    team_recommender = SecondModel(input_row, player_data)
    selected_players, team_stats = team_recommender.recommend_team()

    # If a team was successfully selected, display the results
    if selected_players is not None:
        solution = True
        selected_players['status'] = 'Starting 11'  # Add a column to indicate starting players

        # Remove the selected players and recommend substitutes
        player_shirt_number_to_remove = selected_players['shirt_number'].tolist()
        player_data_updated = player_data[~player_data['shirt_number'].isin(player_shirt_number_to_remove)]

        substitute_recommender = SecondModel(input_row, player_data_updated)
        selected_substitutes, team_stats = substitute_recommender.recommend_team()

        # If substitutes were successfully selected, display the results
        if selected_substitutes is not None:
            selected_substitutes['status'] = 'Substitute'  # Add a column to indicate substitutes

            # Combine both selected players and substitutes into a single file
            combined_team = pd.concat([selected_players, selected_substitutes], ignore_index=True)
            combined_team.to_csv('/kaggle/working/combined_team.csv', index=False)
        
        else:
            selected_players.to_csv('/kaggle/working/combined_team.csv', index=False)
    i += 1
    display(combined_team)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A val

Unnamed: 0,id_match,formations,score,goals,goals_past,total_shots,shots_on_target,shots_off_target,blocked_shots,total_possession,...,clearances,interceptions,corners,win,draw,lose,defenders,midfielders,attackers,advanced_midfielders
1555,778,5-4-1,0 - 4,0,4,5,0,4,1,19.7,...,28,11,1,0,0,1,5,4,1,0
1425,713,4-3-3,Feb-00,0,2,9,1,4,4,17.6,...,37,14,1,0,0,1,4,3,3,0
2345,1173,4-4-2,0 - 2,0,2,6,1,4,1,22.4,...,30,15,2,0,0,1,4,4,2,0
809,405,5-4-1,May-00,0,5,1,0,1,0,19.6,...,31,12,0,0,0,1,5,4,1,0
950,476,5-3-2,0 - 3,0,3,6,1,2,3,20.8,...,24,6,0,0,0,1,5,3,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1702,852,4-3-1-2,0 - 2,0,2,9,3,5,1,30.9,...,19,6,5,0,0,1,4,3,2,1
2842,1422,4-4-2,2-Jan,1,2,12,2,8,2,36.5,...,8,8,7,0,0,1,4,4,2,0
1170,586,4-4-2,1-Mar,1,3,8,2,4,2,44.7,...,17,8,3,0,0,1,4,4,2,0
2440,1221,4-4-1-1,1-Feb,1,2,12,7,4,1,39.5,...,29,10,3,0,0,1,4,4,1,1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /opt/conda/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/743a152283054673a22b0c1b60f0a7d5-pulp.mps -max -timeMode elapsed -branch -printingOptions all -solution /tmp/743a152283054673a22b0c1b60f0a7d5-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 25 COLUMNS
At line 57 RHS
At line 78 BOUNDS
At line 88 ENDATA
Problem MODEL has 20 rows, 9 columns and 9 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Problem is infeasible - 0.00 seconds
Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.00   (Wallclock seconds):       0.00

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /opt/conda/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/b98de70dcea740cc8e552c2cfa36e073-pulp.mps -max -timeMode elapsed -branch -printin

Unnamed: 0,position,goals,total_shots,shots_on_target,shots_off_target,blocked_shots,saved_shots,total_passes,accurate_passes,key_passes,...,injuries,distance_covered,avg_speed,highest_speed,shirt_number,pass_success,dribbles_success,aerial_success,tackles_success,status
0,Central Midfielder,0.0,0.0,0.0,0.0,0.0,0.0,20.0,17.0,0.0,...,0.0,72.175877,8.461598,56.938197,1,84.343434,100.0,0,0,Starting 11
1,Striker,0.0,1.0,0.0,1.0,0.0,0.0,8.0,8.0,0.0,...,0.0,6.660562,5.279565,33.171606,17,100.0,0.0,0,0,Starting 11
2,Left Winger,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,...,0.0,15.418704,12.432331,31.74731,19,0.0,0.0,0,0,Starting 11
3,Left-back,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,5.014078,3.974462,48.009448,2,0.0,0.0,0,0,Starting 11
4,Goalkeeper,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,20.413358,16.56245,45.716607,4,0.0,0.0,0,0,Starting 11
5,Right Winger,0.0,0.0,0.0,0.0,0.0,0.0,11.0,8.0,0.0,...,0.0,57.427622,7.444123,51.599692,8,72.727273,0.0,0,0,Starting 11
6,Striker,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,8.613292,3.720942,56.378987,7,0.0,0.0,0,0,Substitute
7,Right Winger,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,0.0,8.709262,6.903488,42.457556,9,100.0,0.0,0,0,Substitute


In [30]:
import os
import pandas as pd
import google.generativeai as genai
import json
import time

# Set the environment variable in the current notebook session
os.environ["GEMINI_API_KEY"] = "AIzaSyBv4nX97Do78jNAM0Kl5_DFE96qWsBfgbM"

opponent_info = pd.read_csv(r'/kaggle/working/opponent_team.csv')
opponent_info_str = opponent_info.to_string(index=False)

opponent_players = pd.read_csv(r'/kaggle/working/closest_player_data_mobile2.csv')
opponent_players_str = opponent_players.to_string(index=False)

my_team_info = pd.read_csv(r'/kaggle/working/my_team.csv')
my_team_info_str = my_team_info.to_string(index=False)

my_team_players = pd.read_csv(r'/kaggle/working/closest_player_data_mobile1.csv')
my_team_players_str = my_team_players.to_string(index=False)

best_formations = pd.read_csv(r'/kaggle/working/recommended_formations.csv')
best_formations_str = best_formations.to_string(index = False)

match_players_recommendations = pd.read_csv(r'/kaggle/working/combined_team.csv')
match_players_recommendations_str = match_players_recommendations.to_string(index = False)

# Configure the Gemini API key
genai.configure(api_key=os.environ["GEMINI_API_KEY"])

# Create the model
generation_config = {
    "temperature": 1.0,
    "top_p": 0.95,
    "top_k": 40,
    "max_output_tokens": 792,
    "response_mime_type": "application/json",
}

model = genai.GenerativeModel(
    model_name="gemini-1.5-flash-002",
    generation_config=generation_config,
    system_instruction=read_system_instructions('/kaggle/input/static-page-generation/generate_prompt.py')
)




def main():
    # Generate the match summary prompt
    match_summary_prompt = generate_match_summary_prompt(my_team_info_str, opponent_info_str)
    match_summary_json = send_to_gemini_api_with_retry(match_summary_prompt)

    if match_summary_json:
        print("Match Summary Result:")
        print(json.dumps(match_summary_json, indent=4))
    else:
        print("Failed to retrieve valid JSON for match summary.")
        
    # Generate the suggestions prompt
    recommendation_prompt = generate_player_suggestions_prompt(best_formations_str, match_players_recommendations_str)
    recommendation_json = send_to_gemini_api_with_retry(recommendation_prompt)

    if recommendation_json:
        print("Recommendation Result:")
        print(json.dumps(recommendation_json, indent=4))
    else:
        print("Failed to retrieve valid JSON for recommendations.")

    # Generate the opponent analysis prompt
    opponent_analysis_prompt = generate_opponent_analysis_prompt(opponent_info_str, opponent_players_str)
    opponent_analysis_json = send_to_gemini_api_with_retry(opponent_analysis_prompt)

    if opponent_analysis_json:
        print("Opponent Analysis Result:")
        print(json.dumps(opponent_analysis_json, indent=4))
    else:
        print("Failed to retrieve valid JSON for opponent analysis.")

    # Generate the training suggestions prompt
    training_suggestions_prompt = generate_training_suggestions_prompt(my_team_players_str, my_team_info_str, opponent_analysis_json)
    training_suggestions_json = send_to_gemini_api_with_retry(training_suggestions_prompt)

    if training_suggestions_json:
        # Prepare the output structure
        output = {
            "team_training_session": training_suggestions_json.get("team_training_session", ""),
            "worst_5_players_individual_sessions": training_suggestions_json.get("individual_sessions", {})[:4] 
        }

        print("Training Suggestions Result:")
        print(json.dumps(output, indent=4))
    else:
        print("Failed to retrieve valid JSON for training suggestions.")

if __name__ == "__main__":
    main()


Match Summary Result:
{
    "match_summary": {
        "my_team_performance": {
            "formation": "0",
            "score": "0.0 - 0.0",
            "goals": 0.0,
            "total_shots": 0.0,
            "shots_on_target": 0.0,
            "total_possession": "0.0",
            "accurate_passes": 7.0,
            "pass_success_rate": "1.0",
            "key_passes": 0.0,
            "dribbles_success_rate": "0.0/0.0",
            "tackle_success_rate": "0.0",
            "corners": 6.0
        },
        "opponent_performance": {
            "formation": "0",
            "score": "0.0 - 0.0",
            "goals": 0.0,
            "total_shots": 0.0,
            "shots_on_target": 0.0,
            "total_possession": "0.0",
            "accurate_passes": 7.0,
            "pass_success_rate": "1.0",
            "key_passes": 0.0,
            "dribbles_success_rate": "0.0/0.0",
            "tackle_success_rate": "0.0",
            "corners": 6.0
        }
    }
}
Recommendation 

**YOLO Inference**

In [27]:
import torch
import cv2
from ultralytics import YOLO

# Load the YOLO models (one with thresholds and one without)
model_with_thresholds = YOLO('/kaggle/input/new_data.pt/pytorch/default/1/new_data.pt')
model_without_thresholds = YOLO('/kaggle/input/old_data.pt/pytorch/default/1/old_data.pt')  # Second model without thresholds

# Define the video input and output
input_video_path = '/kaggle/input/input-vid/Untitled design.mp4'  # Path to your input video
output_video_path = '/kaggle/working/output_video.mp4'  # Path to save the output video

# Open the input video
cap = cv2.VideoCapture(input_video_path)
video_frames = read_video_in_batches(cap, 0, 5000)

yolo_processor = YOLOVideoProcessor('/kaggle/input/new_data.pt/pytorch/default/1/new_data.pt', class_thresholds)
filtered_detections, detections_classes_2_and_3 = yolo_processor.process_frames_combined(video_frames)

# Process Goal and Line Points
processor = GoalAndLineProcessor()
goals_and_lines_annotations = processor.get_goal_and_line_data(video_frames, detections_classes_2_and_3)
processor.process_annotations(video_frames, detections_classes_2_and_3, output_video_path)

cap = cv2.VideoCapture(output_video_path)
output_video_path = '/kaggle/working/output_video_final.mp4'

# Get the video parameters for writing the output video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for output video
fps = int(cap.get(cv2.CAP_PROP_FPS))  # Frames per second
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Initialize video writer for saving the output
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

# Class-specific thresholds (class index as key, confidence threshold as value)
class_thresholds = {
    0: 0.8,  # Example: class 0 threshold (e.g., 'aerial_duel') is 0.8
    1: 0.7,  # Example: class 1 threshold (e.g., 'dribble') is 0.8
    2: 0.3,  # Example: class 2 threshold (e.g., 'goal') is 0.5
    3: 0.1,  # Example: class 3 threshold (e.g., 'goalline') is 0.5
    4: 0.7,  # Example: class 4 threshold (e.g., 'injury') is 0.8
    5: 0.6,  # Example: class 5 threshold (e.g., 'substitution_board') is 0.8
    6: 0.85,  # Example: class 6 threshold (e.g., 'tackle') is 0.8
}

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Run the first YOLO model (with thresholds) on the frame
    results_with_thresholds = model_with_thresholds(frame)
    df_results_with_thresholds = results_with_thresholds[0].boxes.data.cpu().numpy()  # Get detection results as numpy array

    # Filter the detections by class-specific confidence thresholds
    filtered_boxes = []
    for box in df_results_with_thresholds:
        class_id = int(box[5])  # Class ID is the 6th column
        confidence = box[4]  # Confidence score is the 5th column
        if class_id in class_thresholds and confidence >= class_thresholds[class_id]:
            filtered_boxes.append(box)

    # Run the second YOLO model (without thresholds) on the frame
    results_without_thresholds = model_without_thresholds(frame)
    df_results_without_thresholds = results_without_thresholds[0].boxes.data.cpu().numpy()  # Get detection results as numpy array

    # Combine results from both models (but keep the model-specific results separate)
    combined_boxes = filtered_boxes + df_results_without_thresholds.tolist()

    # Manually draw boxes and labels for both sets of results
    for box in filtered_boxes:
        x1, y1, x2, y2, conf, cls_id = box
        label = model_with_thresholds.names[int(cls_id)]  # Get class name from the first model
        color = (0, 255, 0)  # Use green for the first model
        cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
        cv2.putText(frame, f"{label} {conf:.2f}", (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)

    for box in df_results_without_thresholds:
        x1, y1, x2, y2, conf, cls_id = box
        label = model_without_thresholds.names[int(cls_id)]  # Get class name from the second model
        color = (255, 0, 0)  # Use blue for the second model
        cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
        cv2.putText(frame, f"{label} {conf:.2f}", (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)

    # Write the processed frame to the output video
    out.write(frame)

# Release the video objects
cap.release()
out.release()


0: 384x640 2 dribbles, 1 goal, 88.9ms
Speed: 9.8ms preprocess, 88.9ms inference, 290.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 balls, 18 players, 1 referee, 54.1ms
Speed: 2.1ms preprocess, 54.1ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 dribbles, 1 goal, 34.4ms
Speed: 3.7ms preprocess, 34.4ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 16 players, 1 referee, 34.3ms
Speed: 2.9ms preprocess, 34.3ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 dribbles, 1 goal, 34.4ms
Speed: 3.0ms preprocess, 34.4ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 16 players, 1 referee, 34.3ms
Speed: 2.9ms preprocess, 34.3ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 dribbles, 1 goal, 34.3ms
Speed: 2.8ms preprocess, 34.3ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 

In [None]:
import torch
import cv2
from ultralytics import YOLO

# Load the YOLO models (one with thresholds and one without)
model_with_thresholds = YOLO('/kaggle/input/new_data.pt/pytorch/default/1/new_data.pt')
model_without_thresholds = YOLO('/kaggle/input/old_data.pt/pytorch/default/1/old_data.pt')  # Second model without thresholds

# Define the video input and output
input_video_path = '/kaggle/input/input-vid/Untitled design.mp4'  # Path to your input video
output_video_path = '/kaggle/working/output_video.mp4'  # Intermediate output video path

# Open the input video
cap = cv2.VideoCapture(input_video_path)

# Initialize an empty list to store the video frames
video_frames = []

# Loop through the video and read all frames
while True:
    ret, frame = cap.read()  # Read a frame
    if not ret:
        break  # If no more frames are available, exit the loop
    video_frames.append(frame)  # Append the frame to the list

# Release the video capture object
cap.release()

print(len(video_frames))

# Class-specific thresholds (class index as key, confidence threshold as value)
class_thresholds = {
    0: 0.8,  # Example: class 0 threshold (e.g., 'aerial_duel') is 0.8
    1: 0.7,  # Example: class 1 threshold (e.g., 'dribble') is 0.7
    2: 0.3,  # Example: class 2 threshold (excluded)
    3: 0.1,  # Example: class 3 threshold (excluded)
    4: 0.7,  # Example: class 4 threshold (e.g., 'injury') is 0.7
    5: 0.6,  # Example: class 5 threshold (e.g., 'substitution_board') is 0.6
    6: 0.85,  # Example: class 6 threshold (e.g., 'tackle') is 0.85
}

# YOLO processor and filtering based on thresholds
yolo_processor = YOLOVideoProcessor('/kaggle/input/new_data.pt/pytorch/default/1/new_data.pt', class_thresholds)
filtered_detections, detections_classes_2_and_3 = yolo_processor.process_frames_combined(video_frames)

# Process Goal and Line Points (dummy function since implementation is placeholder)
processor = GoalAndLineProcessor()
goals_and_lines_annotations = processor.get_goal_and_line_data(video_frames, detections_classes_2_and_3)
processor.process_annotations(video_frames, detections_classes_2_and_3, output_video_path, fps=30.0)

# Open processed video for final output
cap = cv2.VideoCapture(output_video_path)
output_video_path_final = '/kaggle/working/output_video_final.mp4'

# Get the video parameters for writing the output video
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Codec for output video
fps = int(cap.get(cv2.CAP_PROP_FPS))  # Frames per second
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Initialize video writer for saving the output
out = cv2.VideoWriter(output_video_path_final, fourcc, fps, (width, height))

# Define colors for each class for results with thresholds
class_colors_with_thresholds = {
    0: (0, 255, 0),    # Green for class 0
    1: (255, 0, 0),    # Blue for class 1
    4: (0, 0, 255),    # Red for class 4
    5: (255, 255, 0),  # Cyan for class 5
    6: (255, 0, 255)   # Magenta for class 6
}

# Define colors for results without thresholds
class_colors_without_thresholds = {
    0: (0, 255, 0),    # Teal for class 0
    1: (255, 0, 0),    # Purple for class 1
    2: (0, 0, 255),    # Olive for class 4
    3: (255, 255, 0),  # Light Green for class 5
    4: (255, 0, 255)   # Violet for class 6
}

# Process each frame and draw the bounding boxes
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Run the first YOLO model (with thresholds) on the frame
    results_with_thresholds = model_with_thresholds(frame)
    df_results_with_thresholds = results_with_thresholds[0].boxes.data.cpu().numpy()  # Get detection results as numpy array

    # Filter the detections by class-specific confidence thresholds, excluding classes 2 and 3
    filtered_boxes = []
    for box in df_results_with_thresholds:
        class_id = int(box[5])  # Class ID is the 6th column
        confidence = box[4]  # Confidence score is the 5th column
        if class_id in class_thresholds and confidence >= class_thresholds[class_id] and class_id not in [2, 3]:
            filtered_boxes.append(box)

    # Run the second YOLO model (without thresholds) on the frame
    results_without_thresholds = model_without_thresholds(frame)
    df_results_without_thresholds = results_without_thresholds[0].boxes.data.cpu().numpy()  # Get detection results as numpy array

    # Manually draw boxes and labels for the first set of results (with thresholds)
    for box in filtered_boxes:
        x1, y1, x2, y2, conf, cls_id = box
        label = model_with_thresholds.names[int(cls_id)]  # Get class name from the first model
        color = class_colors_with_thresholds.get(int(cls_id), (0, 255, 255))  # Default color if not defined

        # Check if the class is 1 and the height of the bounding box is less than 300
        if cls_id == 1 and (y2 - y1) < 170:
            # Draw the bounding box and label
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
            cv2.putText(frame, f"{label} {conf:.2f}", (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)

    # Manually draw boxes and labels for the second set of results (without thresholds)
    for box in df_results_without_thresholds:
        x1, y1, x2, y2, conf, cls_id = box
        label = model_without_thresholds.names[int(cls_id)]  # Get class name from the second model
        color = class_colors_without_thresholds.get(int(cls_id), (0, 128, 128))  # Default color if not defined
        cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
        cv2.putText(frame, f"{label} {conf:.2f}", (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)

    # Write the processed frame to the output video
    out.write(frame)

# Release the video objects
cap.release()
out.release()

509

0: 384x640 2 dribbles, 1 goal, 64.5ms
Speed: 2.4ms preprocess, 64.5ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 dribbles, 1 goal, 62.7ms
Speed: 2.9ms preprocess, 62.7ms inference, 1.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 7 dribbles, 1 goal, 43.4ms
Speed: 2.0ms preprocess, 43.4ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 dribbles, 1 goal, 38.7ms
Speed: 2.0ms preprocess, 38.7ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 dribbles, 1 goal, 38.4ms
Speed: 1.9ms preprocess, 38.4ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 dribbles, 1 goal, 38.4ms
Speed: 2.0ms preprocess, 38.4ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 dribbles, 1 goal, 36.1ms
Speed: 1.9ms preprocess, 36.1ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 dribbles, 1 goal, 36.0ms
Speed

**END_POINTS**

In [32]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import os
import pandas as pd
import json
import google.generativeai as genai
import uvicorn
import cv2
import gc

# Initialize the FastAPI app
app = FastAPI()

# Global variable to store video paths and JSON outputs
json_outputs = {}

# Pydantic model to validate input data for POST request
class VideoPaths(BaseModel):
    video_paths: list[str]

# Define the process_videos function that contains your detailed processing logic
def process_videos(video_paths):
    global json_outputs
    # Clear previous outputs
    json_outputs = {}
    
    # Loop through each video path
    for video_index, video_path in enumerate(video_paths):
        df = initialize_dataframe()  # Initialize DataFrame for players
        team_df = initialize_team_df()  # Initialize DataFrame for teams
        tracker = Tracker('/kaggle/input/old_data.pt/pytorch/default/1/old_data.pt')  # Initialize tracker
        team_assigner = TeamAssigner()
        
        # Set batch size
        batch_size = 200
        video_reader = cv2.VideoCapture(video_path)

        # Get total number of frames in the video
        total_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

        # Define possible formations
        possible_formations = ['4-3-3', '4-2-3-1', '4-3-2-1', '4-1-4-1', '3-5-2', '3-4-1-2', 
                               '4-4-2', '4-4-1-1', '5-4-1', '3-4-3', '4-1-2-1-2', '3-1-4-2', 
                               '3-4-2-1', '4-5-1', '4-3-1-2', '4-2-2-2', '3-5-1-1', '4-1-3-2', 
                               '5-3-2', '3-3-3-1', '4-2-4']
        i = 0
        
        # Loop through the video, processing batch_size frames at a time
        for start_frame in range(0, total_frames, batch_size):
            i += 1
            # Read a batch of frames
            video_frames = read_video_in_batches(video_reader, start_frame, batch_size)

            # If no frames were read, break the loop
            if len(video_frames) == 0:
                break

            # Process batch
            tracks = tracker.get_object_tracks(video_frames)  # Get object tracks for batch
            tracker.add_position_to_tracks(tracks)  # Add position to tracks

            # Camera movement estimator
            camera_movement_estimator = CameraMovementEstimator(video_frames[0])
            camera_movement_per_frame = camera_movement_estimator.get_camera_movement(video_frames)
            camera_movement_estimator.add_adjust_positions_to_tracks(tracks, camera_movement_per_frame)

            # Interpolate Ball Positions
            tracks["ball"] = tracker.interpolate_ball_positions(tracks["ball"])

            # View Transformer
            view_transformer = ViewTransformer()
            view_transformer.add_transformed_position_to_tracks(tracks)

            # Speed and Distance Estimation
            speed_and_distance_estimator = SpeedAndDistance_Estimator()
            df = speed_and_distance_estimator.update_df_with_speed_and_distance(tracks, df)

            # Team Assignment
            if i == 1:
                team_assigner.assign_team_color(video_frames[0], tracks['players'][0])
            
            for frame_num, player_track in enumerate(tracks['players']):
                for player_id, track in player_track.items():
                    team = team_assigner.get_player_team(video_frames[frame_num], track['bbox'], player_id)
                    tracks['players'][frame_num][player_id]['team'] = team
                    tracks['players'][frame_num][player_id]['team_color'] = team_assigner.team_colors[team]
                    
                    # Update DataFrame with team and team color
                    if player_id in df.index:
                        df.at[player_id, 'team'] = team
                        df.at[player_id, 'team_color'] = str(team_assigner.team_colors[team])
                    else:
                        df.loc[player_id] = {'team': team, 'team_color': str(team_assigner.team_colors[team])}

            # Ball Assignment
            player_assigner = PlayerBallAssigner()
            for frame_num, player_track in enumerate(tracks['players']):
                if frame_num < len(tracks['ball']):
                    ball_data_for_frame = tracks['ball'][frame_num]
                    if len(ball_data_for_frame) > 0 and 1 in ball_data_for_frame:
                        ball_bbox = ball_data_for_frame[1]['bbox']
                        assigned_player = player_assigner.assign_ball_to_player(player_track, ball_bbox)

                        if assigned_player != -1:
                            tracks['players'][frame_num][assigned_player]['has_ball'] = True

            # Pass Detection
            pass_detector = PassDetector(tracks, df)
            df = pass_detector.process_game_in_batches(batch_size=20)

            # YOLO Processor and Event Processing
            class_thresholds = {0: 0.8, 1: 0.7, 2: 0.3, 3: 0.1, 4: 0.7, 5: 0.6, 6: 0.85}
            yolo_processor = YOLOVideoProcessor('/kaggle/input/new_data.pt/pytorch/default/1/new_data.pt', class_thresholds)
            filtered_detections, detections_classes_2_and_3 = yolo_processor.process_frames_combined(video_frames)
            
            # Detect other events
            event_processor = EventProcessor(tracks, filtered_detections, df)
            df = event_processor.process_frames_in_batches()

            # Process Goal and Line Points
            processor = GoalAndLineProcessor()
            goals_and_lines_annotations = processor.get_goal_and_line_data(video_frames, detections_classes_2_and_3)

            # Detect shots, corners, saves, goals
            shot_detector = ShotDetector(tracks, df, team_df, goals_and_lines_annotations)
            df, team_df = shot_detector.process_frames_in_batches()

            # Initialize OCR
            player_number_tracker = PlayerShirtNumberTracker(video_frames, tracks, df, '/kaggle/input/ocr/pytorch/default/1/best (2).pt')
            df = player_number_tracker.run()

            # Initialize FormationDetector
            formation_detector = FormationDetector(tracks, possible_formations, team_df)
            team_df = formation_detector.process_frames_in_batches()

            # Initialize SubstitutionDetector
            detector = SubstitutionDetector(class_thresholds, '/kaggle/input/substitution_board/pytorch/default/1/best (3).pt', team_df)
            ocr_results, team_df = detector.extract_annotation(video_frames, filtered_detections, tracks)

            # Delete batch-specific objects and free up memory
            del video_frames, camera_movement_estimator, view_transformer, speed_and_distance_estimator
            del player_assigner, pass_detector, yolo_processor, event_processor, processor, shot_detector
            del filtered_detections, player_number_tracker, formation_detector, detector, ocr_results

            gc.collect()  # Force garbage collection

        # Fill in missing data
        df = df.fillna(0)

        # Final statistics processing
        player_stats = PlayerStats(df)
        team_1_df, team_2_df = player_stats.process_data()

        processor = SoccerMatchDataProcessorFullWithSubs(team_1_df, team_2_df, team_df)
        final_df = processor.process_match_data()

        # Save CSVs
        output_suffix = f"_video_{video_index + 1}"
        save_tracks_to_csv(tracks, csv_path=f'/kaggle/working/tracks_csv{output_suffix}.csv')
        df.to_csv(f'/kaggle/working/player_statistics{output_suffix}.csv', index=True)
        team_df.to_csv(f'/kaggle/working/team_statistics{output_suffix}.csv', index=True)
        team_1_df.to_csv(f'/kaggle/working/team_1_player_statistics{output_suffix}.csv', index=True)
        team_2_df.to_csv(f'/kaggle/working/team_2_player_statistics{output_suffix}.csv', index=True)
        final_df.to_csv(f'/kaggle/working/teams_final_statistics{output_suffix}.csv', index=True)

    """
                                  End of computer vision part
                                  Start of recommendation systems part
    """
    
    # Load the teams data
    teams1 = pd.read_csv('/kaggle/working/teams_final_statistics_video_1.csv')
    teams2 = pd.read_csv('/kaggle/working/teams_final_statistics_video_2.csv')

    # Combine teams1 and teams2 into a single DataFrame
    combined_teams = pd.concat([teams1, teams2], ignore_index=True)

    # Load the data
    mobile_data1 = pd.read_csv('/kaggle/input/mobile-data/mobile_data.csv')
    mobile_data2 = pd.read_csv('/kaggle/input/mobile-data/mobile_data_2.csv')

    correct_shirt_numbers = [str(num) for num in mobile_data1['Shirt_Number']]
    correct_shirt_numbers2 = [str(num) for num in mobile_data2['Shirt_Number']]

    player_data1 = pd.read_csv('/kaggle/working/team_1_player_statistics_video_1.csv')
    player_data2 = pd.read_csv('/kaggle/working/team_2_player_statistics_video_1.csv')
    player_data3 = pd.read_csv('/kaggle/working/team_1_player_statistics_video_2.csv')
    player_data4 = pd.read_csv('/kaggle/working/team_2_player_statistics_video_2.csv')

    player_data_dict = {
        'player_data1': player_data1,
        'player_data2': player_data2,
        'player_data3': player_data3,
        'player_data4': player_data4,
    }

    # Extract the first team's color from mobile_data1
    first_team_color_mobile1 = clean_team_color(mobile_data1.iloc[0]['Team_Color '])

    # Extract the opponent's team color from mobile_data2
    first_team_color_mobile2 = clean_team_color(mobile_data2.iloc[0]['Team_Color '])

    # Handle player data 1 and 2 based on mobile_data1
    closest_player_data_1_2 = []
    for player_data_key in ['player_data1', 'player_data2']:
        closest_player_dataset_key = find_closest_player_dataset(player_data_dict[player_data_key], first_team_color_mobile1, player_data_key)
        if closest_player_dataset_key:
            closest_player_data_1_2.append(player_data_dict[closest_player_dataset_key])

    # Combine the closest data for player 1 and 2 based on mobile_data1
    closest_player_data_mobile1 = pd.concat(closest_player_data_1_2, ignore_index=True)

    # Process the player stats
    player_stats = MyPlayerStats(closest_player_data_mobile1, correct_shirt_numbers, mobile_data1)
    closest_player_data_mobile1 = player_stats.process_data()

    # Correct the shirt numbers and drop the temporary column
    closest_player_data_mobile1['shirt_number'] = closest_player_data_mobile1['corrected_shirt_number']
    closest_player_data_mobile1.drop(columns=['corrected_shirt_number'], inplace=True)

    closest_player_data_mobile1.to_csv('/kaggle/working/closest_player_data_mobile1.csv', index=False)

    # Handle player data 3 and 4 based on mobile_data2
    closest_player_data_3_4 = []
    for player_data_key in ['player_data3', 'player_data4']:
        closest_player_dataset_key = find_closest_player_dataset(player_data_dict[player_data_key], first_team_color_mobile2, player_data_key)
        if closest_player_dataset_key:
            closest_player_data_3_4.append(player_data_dict[closest_player_dataset_key])

    # Combine the closest data for player 3 and 4 based on mobile_data2
    closest_player_data_mobile2 = pd.concat(closest_player_data_3_4, ignore_index=True)

    # Process the player stats
    player_stats = MyPlayerStats(closest_player_data_mobile2, correct_shirt_numbers2, mobile_data2)
    closest_player_data_mobile2 = player_stats.process_data()

    # Correct the shirt numbers and drop the temporary column
    closest_player_data_mobile2['shirt_number'] = closest_player_data_mobile2['corrected_shirt_number']
    closest_player_data_mobile2.drop(columns=['corrected_shirt_number'], inplace=True)

    closest_player_data_mobile2.to_csv('/kaggle/working/closest_player_data_mobile2.csv', index=False)

    # From here on, use only closest_player_data_mobile1 in the rest of the code

    # Find the closest matching rows in teams
    closest_row_team1 = find_closest_match(combined_teams, first_team_color_mobile1)
    opponent_team_color = clean_team_color(mobile_data2.iloc[0]['Team_Color '])
    closest_row_team2 = find_closest_match(combined_teams, opponent_team_color)

    combined_closest_rows = pd.DataFrame([closest_row_team1, closest_row_team2])

    my_team = pd.DataFrame([closest_row_team1])
    opponent_team = pd.DataFrame([closest_row_team2])
    my_team.to_csv('/kaggle/working/my_team.csv', index=False)
    opponent_team.to_csv('/kaggle/working/opponent_team.csv', index=False)

    # Run the models
    teams = combined_closest_rows
    data_cleaned = pd.read_csv('/kaggle/input/recommender-systems-data/data_cleaned.csv')

    # Use the closest player data based on mobile data 1
    player_data = closest_player_data_mobile1

    player_data['shirt_number'] = player_data.pop('Shirt_Number')
    player_data['pass_success'] = player_data.pop('%_pass_success')
    player_data['dribbles_success'] = player_data.pop('%_dribbles_success')
    player_data['aerial_success'] = player_data.pop('%_aerial_success')
    player_data['tackles_success'] = player_data.pop('%_tackles_success')

    # Initialize the first model
    model1 = FirstModel(teams, data_cleaned)

    # Find similar rows based on the first model
    similar_rows = model1.find_similar_rows()

    # Save recommended formations to CSV
    recommended_formations = model1.find_winning_rows(similar_rows)
    recommended_formations.to_csv('/kaggle/working/recommended_formations.csv', index=False)

    # Select the first match data row
    match_data = recommended_formations
    input_row = match_data.iloc[0].to_dict()
    input_row['tackles_success'] = input_row.pop('tackle_success')

    # Initialize the second model to recommend a team based on input row and processed player data
    team_recommender = SecondModel(input_row, player_data)
    selected_players, team_stats = team_recommender.recommend_team()

    # If a team was successfully selected, display the results
    if selected_players is not None:
        selected_players['status'] = 'Starting 11'  # Add a column to indicate starting players

        # Remove the selected players and recommend substitutes
        player_shirt_number_to_remove = selected_players['shirt_number'].tolist()
        player_data_updated = player_data[~player_data['shirt_number'].isin(player_shirt_number_to_remove)]

        substitute_recommender = SecondModel(input_row, player_data_updated)
        selected_substitutes, team_stats = substitute_recommender.recommend_team()

        # If substitutes were successfully selected, display the results
        if selected_substitutes is not None:
            selected_substitutes['status'] = 'Substitute'  # Add a column to indicate substitutes

            # Combine both selected players and substitutes into a single file
            combined_team = pd.concat([selected_players, selected_substitutes], ignore_index=True)
            combined_team.to_csv('/kaggle/working/combined_team.csv', index=False)
        
        else:
            selected_players.to_csv('/kaggle/working/combined_team.csv', index=False)

    """
                                  End of recommendation systems part
                                  Start of LLM part
    """

    # Set the environment variable in the current notebook session
    os.environ["GEMINI_API_KEY"] = "AIzaSyBv4nX97Do78jNAM0Kl5_DFE96qWsBfgbM"

    opponent_info = pd.read_csv(r'/kaggle/working/opponent_team.csv')
    opponent_info_str = opponent_info.to_string(index=False)

    opponent_players = pd.read_csv(r'/kaggle/working/closest_player_data_mobile2.csv')
    opponent_players_str = opponent_players.to_string(index=False)

    my_team_info = pd.read_csv(r'/kaggle/working/my_team.csv')
    my_team_info_str = my_team_info.to_string(index=False)

    my_team_players = pd.read_csv(r'/kaggle/working/closest_player_data_mobile1.csv')
    my_team_players_str = my_team_players.to_string(index=False)

    best_formations = pd.read_csv(r'/kaggle/working/recommended_formations.csv')
    best_formations_str = best_formations.to_string(index=False)

    match_players_recommendations = pd.read_csv(r'/kaggle/working/combined_team.csv')
    match_players_recommendations_str = match_players_recommendations.to_string(index=False)

    # Configure the Gemini API key
    genai.configure(api_key=os.environ["GEMINI_API_KEY"])

    # Generate the match summary prompt
    match_summary_prompt = generate_match_summary_prompt(my_team_info_str, opponent_info_str)
    match_summary_json = send_to_gemini_api_with_retry(match_summary_prompt)

    if match_summary_json:
        print("Match Summary Result:")
        print(json.dumps(match_summary_json, indent=4))
        json_outputs["match_summary"] = match_summary_json
    else:
        print("Failed to retrieve valid JSON for match summary.")
        json_outputs["match_summary"] = "Failed to retrieve valid JSON for match summary."

    # Generate the suggestions prompt
    recommendation_prompt = generate_player_suggestions_prompt(best_formations_str, match_players_recommendations_str)
    recommendation_json = send_to_gemini_api_with_retry(recommendation_prompt)

    if recommendation_json:
        print("Recommendation Result:")
        print(json.dumps(recommendation_json, indent=4))
        json_outputs["recommendations"] = recommendation_json
    else:
        print("Failed to retrieve valid JSON for recommendations.")
        json_outputs["recommendations"] = "Failed to retrieve valid JSON for recommendations."

    # Generate the opponent analysis prompt
    opponent_analysis_prompt = generate_opponent_analysis_prompt(opponent_info_str, opponent_players_str)
    opponent_analysis_json = send_to_gemini_api_with_retry(opponent_analysis_prompt)

    if opponent_analysis_json:
        print("Opponent Analysis Result:")
        print(json.dumps(opponent_analysis_json, indent=4))
        json_outputs["opponent_analysis"] = opponent_analysis_json
    else:
        print("Failed to retrieve valid JSON for opponent analysis.")
        json_outputs["opponent_analysis"] = "Failed to retrieve valid JSON for opponent analysis."

    # Generate the training suggestions prompt
    training_suggestions_prompt = generate_training_suggestions_prompt(my_team_players_str, my_team_info_str, opponent_analysis_json)
    training_suggestions_json = send_to_gemini_api_with_retry(training_suggestions_prompt)

    if training_suggestions_json:
        # Prepare the output structure
        output = {
            "team_training_session": training_suggestions_json.get("team_training_session", ""),
            "worst_5_players_individual_sessions": training_suggestions_json.get("individual_sessions", {})[:4] 
        }
        print("Training Suggestions Result:")
        print(json.dumps(output, indent=4))
        json_outputs["training_suggestions"] = output
    else:
        print("Failed to retrieve valid JSON for training suggestions.")
        json_outputs["training_suggestions"] = "Failed to retrieve valid JSON for training suggestions."

    # Return the final JSON outputs
    return json_outputs

@app.post("/process_videos")
def upload_video_paths(video_data: VideoPaths):
    video_paths = video_data.video_paths
    
    if len(video_paths) != 2:
        raise HTTPException(status_code=400, detail="Please provide exactly 2 video paths.")
    
    # Process the videos (this will call the process_videos function)
    output = process_videos(video_paths)
    
    return {"message": "Videos are being processed", "video_paths": video_paths}

@app.get("/get_json_outputs")
def get_json_outputs():
    if not json_outputs:
        raise HTTPException(status_code=404, detail="No JSON outputs available yet. Videos might still be processing.")
    
    return json_outputs

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)



SystemExit: 1

