# **Dataset Preparation**

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
from PIL import Image
import os
from sklearn import preprocessing

# Define a custom Dataset class to load images and labels
class CricketDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []

        # Iterate through the classes (Bowl Started, Bowl Starting)
        for label_idx, class_name in enumerate(['Bowl Started', 'Bowl Starting']):
            class_folder = os.path.join(root_dir, class_name)
            for img_name in os.listdir(class_folder):
                if img_name.endswith(".jpg") or img_name.endswith(".png"):  # Filter image files
                    img_path = os.path.join(class_folder, img_name)
                    self.image_paths.append(img_path)
                    self.labels.append(label_idx)  # 0 for "Bowl Started", 1 for "Bowl Starting"

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')  # Open image and convert to RGB for consistency
        label = self.labels[idx]

        # Preprocess image: convert to grayscale and resize to 224x224
        if self.transform:
            image = self.transform(image)

        return image, label

# Define image transformations
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Convert to grayscale (1 channel)
    transforms.Resize((224, 224)),                # Resize to 224x224
    transforms.ToTensor(),                        # Convert to tensor
    transforms.Normalize(mean=[0.5], std=[0.5])   # Normalize the image
])

# Load the datasets
train_dataset = CricketDataset(root_dir='/kaggle/input/cricket-bowling/YoloDataset - Copy/train', transform=transform)
valid_dataset = CricketDataset(root_dir='/kaggle/input/cricket-bowling/YoloDataset - Copy/valid', transform=transform)
test_dataset = CricketDataset(root_dir='/kaggle/input/cricket-bowling/YoloDataset - Copy/test', transform=transform)

# Create DataLoader objects for training, validation, and testing
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Load the pre-trained EfficientNetB0 model
model = models.efficientnet_b0(pretrained=True)

# Modify the first convolutional layer to accept single-channel (grayscale) input
# EfficientNet expects 3 channels (RGB), so we modify the first layer
model.features[0] = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))

# Modify the final classifier layer to have 2 output classes (Bowl Started, Bowl Starting)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Set up the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)





# **Model Training**

In [None]:
# Function for training the model
def train_model(model, train_loader, valid_loader, criterion, optimizer, num_epochs=5):
    best_valid_loss = float('inf')

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_preds = 0
        total_preds = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            # Accuracy calculation
            _, predicted = torch.max(outputs, 1)
            correct_preds += (predicted == labels).sum().item()
            total_preds += labels.size(0)

        epoch_loss = running_loss / len(train_loader)
        epoch_accuracy = 100 * correct_preds / total_preds

        # Validation phase
        model.eval()
        valid_loss = 0.0
        correct_preds = 0
        total_preds = 0

        with torch.no_grad():
            for inputs, labels in valid_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                valid_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                correct_preds += (predicted == labels).sum().item()
                total_preds += labels.size(0)

        valid_loss = valid_loss / len(valid_loader)
        valid_accuracy = 100 * correct_preds / total_preds

        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_accuracy:.2f}%")
        print(f"Validation Loss: {valid_loss:.4f}, Validation Accuracy: {valid_accuracy:.2f}%")

        # Save the best model based on validation loss
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(model.state_dict(), "best_model.pth")
            print("Saved best model")

# Train the model
train_model(model, train_loader, valid_loader, criterion, optimizer, num_epochs=30)


# **Model Evaluation**

In [None]:
# Evaluate the model on the test set
def evaluate_model(model, test_loader):
    model.eval()
    correct_preds = 0
    total_preds = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)

            _, predicted = torch.max(outputs, 1)
            correct_preds += (predicted == labels).sum().item()
            total_preds += labels.size(0)

    test_accuracy = 100 * correct_preds / total_preds
    print(f"Test Accuracy: {test_accuracy:.2f}%")

# Evaluate the model on the test set
evaluate_model(model, test_loader)

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import models
import cv2
import os
from PIL import Image

# Load the trained EfficientNetB0 model
model_path = "/kaggle/input/efficentnetb0/pytorch/default/1/best_model (1).pth"
model = models.efficientnet_b0(pretrained=False)
model.features[0] = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()  # Set the model to evaluation mode

# Define image transformations for inference
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Convert to grayscale
    transforms.Resize((224, 224)),                # Resize to 224x224
    transforms.ToTensor(),                        # Convert to tensor
    transforms.Normalize(mean=[0.5], std=[0.5])   # Normalize the image
])

# Function to process video and make predictions
def process_video(video_path, model):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)  # Get frames per second
    results = []  # List to store predictions with timestamps

    frame_count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to PIL Image
        frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        
        # Apply transformations
        frame_tensor = transform(frame_pil).unsqueeze(0).to(device)

        # Make prediction
        with torch.no_grad():
            output = model(frame_tensor)
            _, predicted = torch.max(output, 1)

        # Get timestamp in hh:mm:ss format
        timestamp = frame_count / fps
        time_hhmmss = f"{int(timestamp // 3600):02}:{int((timestamp % 3600) // 60):02}:{int(timestamp % 60):02}"

        # Append prediction and timestamp
        results.append((predicted.item(), time_hhmmss))

        frame_count += 1

    cap.release()
    return results

# Path to the video
video_path = "/kaggle/input/cricket-shaheen-bowling/shaheen.mp4"

# Process the video and get predictions
predictions = process_video(video_path, model)

# Print the results
for pred, timestamp in predictions:
    class_name = "Bowl Started" if pred == 0 else "Bowl Starting"
    print(f"Timestamp: {timestamp}, Prediction: {class_name}")


In [21]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import models
import cv2
from PIL import Image
import easyocr
import re
from collections import deque, Counter

# Load EfficientNetB0
model_path = "/kaggle/input/efficentnetb0/pytorch/default/1/best_model (1).pth"
model = models.efficientnet_b0(pretrained=False)
model.features[0] = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
model.classifier[1] = nn.Linear(model.classifier[1].in_features, 2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

def extract_cricket_score_from_frame(frame, reader):
    height, width = frame.shape[:2]
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    y_start = int(0.76 * height)
    y_end = int(0.986 * height)
    x_start = 0
    x_end = int(0.47 * width)
    region = gray[y_start:y_end, x_start:x_end]
    _, binary = cv2.threshold(region, 150, 255, cv2.THRESH_BINARY_INV)
    result = reader.readtext(binary, detail=0)
    ocr_text = ' '.join(result)
    match = re.search(r'\b(\d+)-(\d+)\b', ocr_text)
    if match:
        return int(match.group(1)), int(match.group(2))
    return None, None

def has_consecutive_repeats(seq, value, min_repeat):
    repeat = 0
    for item in seq:
        if item == value:
            repeat += 1
            if repeat >= min_repeat:
                return True
        else:
            repeat = 0
    return False

def get_stable_score(score_window):
    counts = Counter(score_window)
    for score, count in counts.items():
        if count >= 3:
            if has_consecutive_repeats(score_window, score, min_repeat=3) or count == max(counts.values()):
                return score
    return None

def process_video(video_path, frame_skip=10):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    reader = easyocr.Reader(['en'])

    frame_count = 0
    timestamps_effnet = []
    timestamps_ocr = []
    frame_window = []
    pause_counter = 0
    last_score = None
    ocr_score_window = deque(maxlen=15)

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        if pause_counter > 0:
            pause_counter -= 1
            frame_count += 1
            continue

        img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        img_tensor = transform(img).unsqueeze(0).to(device)

        with torch.no_grad():
            outputs = model(img_tensor)
            _, predicted = torch.max(outputs, 1)
            prediction = predicted.item()

        frame_window.append(prediction)
        if len(frame_window) > 20:
            frame_window.pop(0)

        if frame_window.count(1) >= 15:
            consecutive_count = 0
            for pred in frame_window:
                if pred == 1:
                    consecutive_count += 1
                    if consecutive_count >= 9:
                        timestamp_sec = frame_count / fps
                        minutes = int(timestamp_sec // 60)
                        seconds = int(timestamp_sec % 60)
                        hours = int(minutes // 60)
                        minutes = minutes % 60
                        timestamp = f"{hours:02}:{minutes:02}:{seconds:02}"
                        timestamps_effnet.append(timestamp)
                        pause_counter = int(10 * fps)
                        frame_window.clear()
                        break
                else:
                    consecutive_count = 0

        if frame_count % frame_skip == 0:
            runs, wickets = extract_cricket_score_from_frame(frame, reader)
            if runs is not None and wickets is not None:
                ocr_score_window.append((runs, wickets))

            stable_score = get_stable_score(list(ocr_score_window))
            if stable_score:
                confirmed_runs, confirmed_wickets = stable_score
                if last_score is None:
                    last_score = (confirmed_runs, confirmed_wickets)
                    should_add = False
                else:
                    run_diff = confirmed_runs - last_score[0]
                    wicket_diff = confirmed_wickets - last_score[1]
                    valid_run = run_diff in [4, 6]
                    valid_wicket = wicket_diff == 1
                    should_add = valid_run or valid_wicket
                    last_score = (confirmed_runs, confirmed_wickets)
    
                if should_add:
                    timestamp_sec = frame_count / fps
                    minutes = int(timestamp_sec // 60)
                    seconds = int(timestamp_sec % 60)
                    hours = int(minutes // 60)
                    minutes = minutes % 60
                    timestamp = f"{hours:02}:{minutes:02}:{seconds:02}"
                    timestamps_ocr.append((confirmed_runs, confirmed_wickets, last_score, timestamp))

        frame_count += 1

    cap.release()
    return timestamps_effnet, timestamps_ocr

# Example usage
video_path = "/kaggle/input/fullt20-pakvnz/Full Lenght Match.mp4"
timestamps_effnet, timestamps_ocr = process_video(video_path)

print("EfficientNet Timestamps:")
print(timestamps_effnet)

print("\nOCR Timestamps and Scores:")
for runs, wickets, last_score, timestamp in timestamps_ocr:
    print(f"Runs: {runs}, Wickets: {wickets}, Last Score: {last_score}, Timestamp: {timestamp}")


  model.load_state_dict(torch.load(model_path, map_location=device))


EfficientNet Timestamps:
['00:02:34', '00:02:45', '00:03:20', '00:04:15', '00:04:48', '00:05:11', '00:05:37', '00:06:49', '00:07:28', '00:08:11', '00:08:53', '00:09:16', '00:09:31', '00:10:12', '00:11:02', '00:11:17', '00:11:49', '00:12:21', '00:12:39', '00:13:00', '00:13:36', '00:14:11', '00:15:13', '00:15:46', '00:16:37', '00:17:30', '00:18:06', '00:18:42', '00:19:59', '00:20:33', '00:20:51', '00:21:02', '00:21:12', '00:22:01', '00:22:12', '00:22:56', '00:23:35', '00:24:09', '00:24:51', '00:25:54', '00:26:29', '00:27:04', '00:27:45', '00:28:16', '00:30:37', '00:31:11', '00:31:23', '00:31:43', '00:32:26', '00:32:38', '00:33:05', '00:33:37', '00:34:10', '00:34:30', '00:36:23', '00:36:59', '00:37:20', '00:37:30', '00:37:59', '00:38:21', '00:38:35', '00:38:45', '00:39:47', '00:40:14', '00:40:31', '00:41:02', '00:41:35', '00:42:04', '00:42:56', '00:43:11', '00:43:28', '00:43:41', '00:44:05', '00:46:03', '00:46:48', '00:47:00', '00:47:53', '00:48:14', '00:48:31', '00:49:01', '00:49:20', '0

In [None]:
# import torch
# import torch.nn as nn
# import torchvision.transforms as transforms
# from torchvision import models
# import cv2
# from PIL import Image
# import easyocr
# import re
# from collections import deque, Counter


# def extract_cricket_score_from_frame(frame, reader):
#     height, width = frame.shape[:2]
#     gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
#     y_start = int(0.76 * height)
#     y_end = int(0.986 * height)
#     x_start = 0
#     x_end = int(0.47 * width)
#     region = gray[y_start:y_end, x_start:x_end]
#     _, binary = cv2.threshold(region, 150, 255, cv2.THRESH_BINARY_INV)
#     result = reader.readtext(binary, detail=0)
#     ocr_text = ' '.join(result)
#     match = re.search(r'\b(\d+)-(\d+)\b', ocr_text)
#     if match:
#         return int(match.group(1)), int(match.group(2))
#     return None, None

# def has_consecutive_repeats(seq, value, min_repeat):
#     repeat = 0
#     for item in seq:
#         if item == value:
#             repeat += 1
#             if repeat >= min_repeat:
#                 return True
#         else:
#             repeat = 0
#     return False

# def get_stable_score(score_window):
#     counts = Counter(score_window)
#     for score, count in counts.items():
#         if count >= 2:
#             if has_consecutive_repeats(score_window, score, min_repeat=3) or count == max(counts.values()):
#                 return score
#     return None
# def process_video(video_path, frame_skip=15, start_time_sec=0):
#     cap = cv2.VideoCapture(video_path)
#     reader = easyocr.Reader(['en'])
#     fps = cap.get(cv2.CAP_PROP_FPS)
#     frame_count = 0
#     last_score = None
#     score_window = deque(maxlen=15)

#     # Skip to the starting frame (26 seconds)
#     start_frame = int(start_time_sec * fps)
#     cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
#     frame_count = start_frame

#     while cap.isOpened():
#         ret, frame = cap.read()
#         if not ret:
#             break

#         runs, wickets = extract_cricket_score_from_frame(frame, reader)
#         if runs is not None and wickets is not None:
#             score_window.append((runs, wickets))

#         stable_score = get_stable_score(list(score_window))

#         timestamp_sec = frame_count / fps
#         minutes = int(timestamp_sec // 60)
#         seconds = int(timestamp_sec % 60)
#         hours = int(minutes // 60)
#         minutes = minutes % 60
#         timestamp = f"{hours:02}:{minutes:02}:{seconds:02}"
#         wicket_diff = 0
#         run_diff = 0
#         if stable_score:
#             confirmed_runs, confirmed_wickets = stable_score
#             if last_score is None:
#                 last_score = (confirmed_runs, confirmed_wickets)
#                 should_add = False
#             else:
#                 run_diff = confirmed_runs - last_score[0]
#                 wicket_diff = confirmed_wickets - last_score[1]
#                 valid_run = run_diff in [4, 6]
#                 valid_wicket = wicket_diff == 1
#                 should_add = valid_run or valid_wicket
#                 last_score = (confirmed_runs, confirmed_wickets)
#             if should_add:
#                 print(f"✅ Time {timestamp}: Score = {confirmed_runs}-{confirmed_wickets} , Last Score: {last_score} , Wicket: {wicket_diff} , run_diff {run_diff} (Highlight)")
#             else:
#                 print(f"❌ Time {timestamp}: Score = {confirmed_runs}-{confirmed_wickets}, Last Score: {last_score} , Wicket: {wicket_diff} , run_diff {run_diff} (Not a highlight)")
        
#         else:
#             print(f"⏳ Time {timestamp}: Score unstable or not detected")

#         for _ in range(frame_skip - 1):
#             cap.read()
#             frame_count += 1

#         frame_count += 1

#     cap.release()


# # Example usage
# video_path = "/kaggle/input/50-se-56-wali-video/50 se 56 wali video - Made with Clipchamp (1).mp4"
# process_video(video_path)


In [22]:
import cv2

file = "/kaggle/input/30minvideo/Untitled video - Made with Clipchamp (1).mp4"

cap = cv2.VideoCapture(file)

if not cap.isOpened():
    print("Error opening video file")
else:
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    print(f"Frame size: {width} x {height}")

cap.release()


Frame size: 852 x 480


In [23]:
# EfficientNet Timestamps:
# ['00:00:00', '00:00:36', '00:01:09', '00:01:52', '00:02:55', '00:03:30', '00:04:05', '00:04:45', '00:05:16', '00:07:37', '00:08:11', '00:08:23', '00:08:43', '00:09:26', '00:09:39', '00:10:05', '00:10:37', '00:11:10', '00:11:30', '00:13:23', '00:13:59', '00:14:20', '00:14:31', '00:14:59', '00:15:21', '00:15:35', '00:15:46', '00:16:47', '00:17:14', '00:17:31', '00:18:02', '00:18:36', '00:19:05', '00:19:56', '00:20:11', '00:20:28', '00:20:41', '00:21:06', '00:23:04', '00:23:49', '00:24:01', '00:24:54', '00:25:14', '00:25:32', '00:26:01', '00:26:20', '00:26:43', '00:27:04', '00:28:03', '00:28:27', '00:28:47', '00:29:10', '00:29:24', '00:29:49']

# OCR Timestamps and Scores:
# Runs: 41, Wickets: 0, Timestamp: 00:00:11
# Runs: 46, Wickets: 0, Timestamp: 00:01:20
# Runs: 50, Wickets: 0, Timestamp: 00:04:16
# Runs: 50, Wickets: 1, Timestamp: 00:05:33
# Runs: 55, Wickets: 1, Timestamp: 00:10:55
# Runs: 56, Wickets: 2, Timestamp: 00:12:23
# Runs: 60, Wickets: 2, Timestamp: 00:14:43
# Runs: 65, Wickets: 2, Timestamp: 00:18:48
# Runs: 67, Wickets: 3, Timestamp: 00:21:24
# Runs: 72, Wickets: 3, Timestamp: 00:27:19
# Runs: 76, Wickets: 3, Timestamp: 00:29:35

In [24]:
print("\nOCR Timestamps and Scores:")
print(len(timestamps_ocr))


OCR Timestamps and Scores:
42


# **Concatenating TimeStamps **

In [25]:
from datetime import datetime
from datetime import datetime, timedelta
def find_closest_timestamps(ocr_list, effnet_list):

    # Convert EfficientNet timestamps to datetime objects for easy comparison
    effnet_times = [datetime.strptime(ts, "%H:%M:%S") for ts in effnet_list]

    # Result list to store combined values
    result = []

    # Loop through each value in the OCR list
    for runs, wickets,_, ocr_timestamp in ocr_list:
        ocr_time = datetime.strptime(ocr_timestamp, "%H:%M:%S")
    
        # Filter only those EfficientNet times that are <= OCR time
        valid_effnet_times = [eff_time for eff_time in effnet_times if eff_time <= ocr_time]
    
        if not valid_effnet_times:
            continue  # skip if no valid timestamp before OCR time
    
        # Now get the closest from valid ones (latest before OCR)
        closest_effnet_time = max(valid_effnet_times, key=lambda eff_time: eff_time)
    
        closest_effnet_str = closest_effnet_time.strftime("%H:%M:%S")
        result.append((closest_effnet_str, ocr_timestamp))


    return result
def add_seconds_to_time(time_str, seconds_to_add):
    # Convert the time string to a datetime object
    time_obj = datetime.strptime(time_str, '%H:%M:%S')
    
    # Add the specified seconds using timedelta
    new_time_obj = time_obj + timedelta(seconds=seconds_to_add)
    
    # Convert it back to a time string
    return new_time_obj.strftime('%H:%M:%S')



new_list = find_closest_timestamps(timestamps_ocr, timestamps_effnet)
# print(new_list)

new_list = [(start, add_seconds_to_time(end, 5)) for start, end in new_list]
# Print the result
print(new_list)


[('00:02:45', '00:03:23'), ('00:07:28', '00:07:48'), ('00:08:11', '00:08:30'), ('00:13:36', '00:13:56'), ('00:15:46', '00:16:05'), ('00:24:09', '00:24:28'), ('00:28:16', '00:28:41'), ('00:34:30', '00:35:31'), ('00:44:05', '00:44:32'), ('00:54:53', '00:55:12'), ('00:55:22', '00:56:17'), ('01:06:02', '01:06:23'), ('01:07:32', '01:07:51'), ('01:08:46', '01:09:06'), ('01:13:59', '01:14:18'), ('01:14:38', '01:14:57'), ('01:15:16', '01:15:36'), ('01:16:22', '01:16:47'), ('01:23:01', '01:23:20'), ('01:24:39', '01:24:58'), ('01:27:14', '01:27:33'), ('01:27:53', '01:28:18'), ('01:29:58', '01:30:46'), ('01:33:22', '01:33:58'), ('01:33:54', '01:34:13'), ('01:37:28', '01:37:54'), ('01:39:25', '01:39:47'), ('01:41:38', '01:42:00'), ('01:48:45', '01:49:10'), ('01:50:49', '01:51:16'), ('01:54:29', '01:54:48'), ('02:04:16', '02:04:40'), ('02:13:22', '02:13:45'), ('02:20:39', '02:20:58'), ('02:24:29', '02:24:52'), ('02:34:38', '02:35:02'), ('02:35:29', '02:36:01'), ('02:37:27', '02:37:46'), ('02:38:46'

# **Clips Extraction**

In [30]:
import os
import subprocess

def extract_clips_with_ffmpeg(video_path, clip_timestamps, output_folder="/kaggle/working/new2"):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for idx, (start, end) in enumerate(clip_timestamps):
        # Create output file name
        output_file = os.path.join(output_folder, f"clip2_{idx + 1}.mp4")
        
        # Prepare ffmpeg command
        cmd = [
            'ffmpeg', 
            '-i', video_path, 
            '-ss', start,  # Start time
            '-to', end,    # End time
            '-c', 'copy',  # Copy video codec
            output_file    # Output file path
        ]
        
        # Run the command
        subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

        print(f"Extracted clip {idx + 1}: {start} to {end}")

# Example usage
video_path = "/kaggle/input/fullt20-pakvnz/Full Lenght Match.mp4"
extract_clips_with_ffmpeg(video_path, new_list)


Extracted clip 1: 00:02:45 to 00:03:23
Extracted clip 2: 00:07:28 to 00:07:48
Extracted clip 3: 00:08:11 to 00:08:30
Extracted clip 4: 00:13:36 to 00:13:56
Extracted clip 5: 00:15:46 to 00:16:05
Extracted clip 6: 00:24:09 to 00:24:28
Extracted clip 7: 00:28:16 to 00:28:41
Extracted clip 8: 00:34:30 to 00:35:31
Extracted clip 9: 00:44:05 to 00:44:32
Extracted clip 10: 00:54:53 to 00:55:12
Extracted clip 11: 00:55:22 to 00:56:17
Extracted clip 12: 01:06:02 to 01:06:23
Extracted clip 13: 01:07:32 to 01:07:51
Extracted clip 14: 01:08:46 to 01:09:06
Extracted clip 15: 01:13:59 to 01:14:18
Extracted clip 16: 01:14:38 to 01:14:57
Extracted clip 17: 01:15:16 to 01:15:36
Extracted clip 18: 01:16:22 to 01:16:47
Extracted clip 19: 01:23:01 to 01:23:20
Extracted clip 20: 01:24:39 to 01:24:58
Extracted clip 21: 01:27:14 to 01:27:33
Extracted clip 22: 01:27:53 to 01:28:18
Extracted clip 23: 01:29:58 to 01:30:46
Extracted clip 24: 01:33:22 to 01:33:58
Extracted clip 25: 01:33:54 to 01:34:13
Extracted

# Clips Concatenation

In [32]:
import os
import subprocess

def concatenate_clips(clip_folder, output_file):
    # Get the list of all extracted clips
    clips = [os.path.join(clip_folder, f) for f in os.listdir(clip_folder) if f.endswith('.mp4')]
    
    # Sort clips by name (ensure the order is correct)
    clips.sort()

    # Prepare a file list for ffmpeg to concatenate
    file_list_path = '/kaggle/working/new2/clip_list.txt'
    with open(file_list_path, 'w') as f:
        for clip in clips:
            f.write(f"file '{clip}'\n")
    
    # Concatenate clips using ffmpeg
    cmd = [
        'ffmpeg',
        '-f', 'concat',
        '-safe', '0',
        '-i', file_list_path,
        '-c', 'copy',  # Copy video codec
        output_file     # Output concatenated video file
    ]
    
    subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    print(f"Concatenated video saved to {output_file}")

# Example usage
clip_folder = "/kaggle/working/new2/"
output_file = "/kaggle/working/new2/FullMatch2.mp4"
concatenate_clips(clip_folder, output_file)


Concatenated video saved to /kaggle/working/new2/FullMatch2.mp4
