In [47]:
import os
import cv2
import torch
import torch.nn as nn
import numpy as np
import csv

In [46]:
input_folder = r"D:\Zebra_Fish_Dataset\novel tank cropped videos\output_clips" 
csv_output_path = r"D:\Praharsha\code\CAMZ\data\interim\boundingboxes.csv"  # CSV file
npy_output_path = r"D:\Praharsha\code\CAMZ\data\interim\boundingboxes_array.npy" # NPY file

In [14]:
# Function to convert YOLO format to pixel coordinates
def yolo_to_pixel_coords(box, frame_width, frame_height):
    cx, cy, w, h = box
    x1 = int((cx - w / 2) * frame_width)
    y1 = int((cy - h / 2) * frame_height)
    x2 = int((cx + w / 2) * frame_width)
    y2 = int((cy + h / 2) * frame_height)
    return x1, y1, x2, y2

In [15]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.global_avg_pool = nn.AdaptiveAvgPool2d((8, 8))  

        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(in_features= 128 * 8 * 8, out_features=128)
        self.fc2 = nn.Linear(in_features=128, out_features=64)
        self.fc3 = nn.Linear(in_features=64, out_features=4)
        
        
    def forward(self, x):
        x = self.pool1(torch.relu(self.conv1(x)))
        x = self.pool2(torch.relu(self.conv2(x)))
        x = self.global_avg_pool(self.pool3(torch.relu(self.conv3(x))))
                
        x = self.flatten(x)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        
        return x      

In [17]:
def load_checkpoint(checkpoint, architecture, optimizer):
    print("loading checkpoint...")
    checkpoint = torch.load(checkpoint)
    
    model = architecture()
    
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    model = model.to('cuda')
    return model.eval()

In [18]:
model = CNNModel()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
model_path = r"D:\Praharsha\code\CAMZ\models\model_history\1.0-CNN\CNN_checkpoint.pth.tar"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = load_checkpoint(model_path, CNNModel, optimizer )
model.eval()

loading checkpoint...


  checkpoint = torch.load(checkpoint)


CNNModel(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (global_avg_pool): AdaptiveAvgPool2d(output_size=(8, 8))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=8192, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=4, bias=True)
)

In [45]:
def yolo_to_center_coords(box):
    cx, cy, _, _ = box
    return [round(cx, 4), round(cy, 4)]

video_files = sorted([f for f in os.listdir(input_folder) if f.endswith(".mp4")])

with open(csv_output_path, "w", newline="") as csv_file:
    csv_writer = csv.writer(csv_file)
    max_frames = 300
    header = ["video_name"] + [f"frame_{i+1}" for i in range(max_frames)]
    csv_writer.writerow(header)
    for video_file in video_files:
        input_video = os.path.join(input_folder, video_file)
        cap = cv2.VideoCapture(input_video)
        print(f"Processing: {video_file}...")
        video_center_data = [video_file]
        frame_count = 0
        while frame_count < max_frames:
            ret, frame = cap.read()
            if not ret:
                break
            resized_frame = cv2.resize(frame, (224, 224))
            img_tensor = torch.from_numpy(resized_frame).permute(2, 0, 1).float().unsqueeze(0) / 255.0
            img_tensor = img_tensor.to(device)
            with torch.no_grad():
                preds = model(img_tensor)
            if preds.shape[0] > 0:
                center_coords = yolo_to_center_coords(preds[0, :4].tolist())
            else:
                center_coords = [None, None]  # No detection
            video_center_data.append(center_coords)
            frame_count += 1
        cap.release()
        while len(video_center_data) < max_frames + 1:
            video_center_data.append([None, None])

        csv_writer.writerow(video_center_data)
print(f"All videos processed! Bounding centers saved to {csv_output_path}")

Processing: 10Rcrop_1.mp4...
Processing: 10Rcrop_10.mp4...
Processing: 10Rcrop_11.mp4...
Processing: 10Rcrop_12.mp4...
Processing: 10Rcrop_13.mp4...
Processing: 10Rcrop_14.mp4...
Processing: 10Rcrop_15.mp4...
Processing: 10Rcrop_16.mp4...
Processing: 10Rcrop_17.mp4...
Processing: 10Rcrop_18.mp4...
Processing: 10Rcrop_19.mp4...
Processing: 10Rcrop_2.mp4...
Processing: 10Rcrop_20.mp4...
Processing: 10Rcrop_21.mp4...
Processing: 10Rcrop_22.mp4...
Processing: 10Rcrop_23.mp4...
Processing: 10Rcrop_24.mp4...
Processing: 10Rcrop_25.mp4...
Processing: 10Rcrop_26.mp4...
Processing: 10Rcrop_27.mp4...
Processing: 10Rcrop_28.mp4...
Processing: 10Rcrop_29.mp4...
Processing: 10Rcrop_3.mp4...
Processing: 10Rcrop_30.mp4...
Processing: 10Rcrop_31.mp4...
Processing: 10Rcrop_32.mp4...
Processing: 10Rcrop_33.mp4...
Processing: 10Rcrop_34.mp4...
Processing: 10Rcrop_35.mp4...
Processing: 10Rcrop_4.mp4...
Processing: 10Rcrop_5.mp4...
Processing: 10Rcrop_6.mp4...
Processing: 10Rcrop_7.mp4...
Processing: 10Rcr

In [48]:
def yolo_to_center_coords(box):
    cx, cy, _, _ = box
    return [round(cx, 4), round(cy, 4)]  # Return as list instead of string

# Process videos
video_files = sorted([f for f in os.listdir(input_folder) if f.endswith(".mp4")])

# Store data for .npy file
all_data = []

with open(csv_output_path, "w", newline="") as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write header: video_name, frame1, frame2, ...
    max_frames = 300  # Limit frames to 300
    header = ["video_name"] + [f"frame_{i+1}" for i in range(max_frames)]
    csv_writer.writerow(header)

    for video_file in video_files:
        input_video = os.path.join(input_folder, video_file)
        cap = cv2.VideoCapture(input_video)

        print(f"Processing: {video_file}...")

        video_center_data = [video_file]  # Start with video name
        frame_count = 0

        while frame_count < max_frames:
            ret, frame = cap.read()
            if not ret:
                break

            resized_frame = cv2.resize(frame, (224, 224))
            img_tensor = torch.from_numpy(resized_frame).permute(2, 0, 1).float().unsqueeze(0) / 255.0
            img_tensor = img_tensor.to(device)

            with torch.no_grad():
                preds = model(img_tensor)

            if preds.shape[0] > 0:
                center_coords = yolo_to_center_coords(preds[0, :4].tolist())
            else:
                center_coords = [None, None] 

            video_center_data.append(center_coords)
            frame_count += 1

        cap.release()
        while len(video_center_data) < max_frames + 1:
            video_center_data.append([None, None])

        csv_writer.writerow(video_center_data)
        all_data.append(video_center_data[1:])

np.save(npy_output_path, np.array(all_data, dtype=object))

print(f"All videos processed! Bounding centers saved to {csv_output_path} and {npy_output_path}")


Processing: 10Rcrop_1.mp4...
Processing: 10Rcrop_10.mp4...
Processing: 10Rcrop_11.mp4...
Processing: 10Rcrop_12.mp4...
Processing: 10Rcrop_13.mp4...
Processing: 10Rcrop_14.mp4...
Processing: 10Rcrop_15.mp4...
Processing: 10Rcrop_16.mp4...
Processing: 10Rcrop_17.mp4...
Processing: 10Rcrop_18.mp4...
Processing: 10Rcrop_19.mp4...
Processing: 10Rcrop_2.mp4...
Processing: 10Rcrop_20.mp4...
Processing: 10Rcrop_21.mp4...
Processing: 10Rcrop_22.mp4...
Processing: 10Rcrop_23.mp4...
Processing: 10Rcrop_24.mp4...
Processing: 10Rcrop_25.mp4...
Processing: 10Rcrop_26.mp4...
Processing: 10Rcrop_27.mp4...
Processing: 10Rcrop_28.mp4...
Processing: 10Rcrop_29.mp4...
Processing: 10Rcrop_3.mp4...
Processing: 10Rcrop_30.mp4...
Processing: 10Rcrop_31.mp4...
Processing: 10Rcrop_32.mp4...
Processing: 10Rcrop_33.mp4...
Processing: 10Rcrop_34.mp4...
Processing: 10Rcrop_35.mp4...
Processing: 10Rcrop_4.mp4...
Processing: 10Rcrop_5.mp4...
Processing: 10Rcrop_6.mp4...
Processing: 10Rcrop_7.mp4...
Processing: 10Rcr