In [1]:
    import torch
    import torch.nn as nn
    import torch.nn.functional as F

    class BinaryClassificationCNN(nn.Module):
        def __init__(self):
            super(BinaryClassificationCNN, self).__init__()
            
            # 첫 번째 Convolutional 레이어: 3개의 입력 채널(RGB), 32개의 출력 채널, 2*2 커널
            self.conv1 = nn.Conv2d(3, 32, kernel_size=2, padding=1)
            # 두 번째 Convolutional 레이어
            self.conv2 = nn.Conv2d(32, 64, kernel_size=2, padding=1)
            # 세 번째 Convolutional 레이어
            self.conv3 = nn.Conv2d(64, 128, kernel_size=2, padding=1)
            
            # Pooling 레이어
            self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
            
            # Dropout 레이어
            self.dropout = nn.Dropout(0.5)
            
            # 첫 번째 Fully Connected 레이어
            self.fc1 = nn.Linear(128 * 4 * 4, 128)
            
            # 두 번째 Fully Connected 레이어
            self.fc2 = nn.Linear(128, 1)

        def forward(self, x):
            # Convolutional 레이어와 활성화 함수 적용
            x = F.relu(self.conv1(x))
            x = self.pool(x)
            x = F.relu(self.conv2(x))
            x = self.pool(x)
            x = F.relu(self.conv3(x))
            x = self.pool(x)
            # Flatten
            x = x.view(-1, 128 * 4 *4)
            # Dropout 적용
            x = self.dropout(x)
            # Fully Connected 레이어와 활성화 함수 적용
            x = F.relu(self.fc1(x))
            x = self.dropout(x)
            # 출력 레이어
            x = torch.sigmoid(self.fc2(x))
            return x

    # 모델 인스턴스화
    model = BinaryClassificationCNN()

In [None]:
import os
from PIL import Image
from torchvision import transforms
import torch

directory = r'C:\Users\lwj01\HowFastTennisBallIs\novak_sinner_over_30'
save_directory = r'C:\Users\lwj01\HowFastTennisBallIs\y_v2'  # 저장할 경로

def sliding_window(image, step_size, window_size):
    """이미지 위에서 슬라이딩 윈도우를 생성하는 함수"""
    for y in range(0, image.size[1] - window_size[1], step_size):
        for x in range(0, image.size[0] - window_size[0], step_size):
            yield (x, y, image.crop((x, y, x + window_size[0], y + window_size[1])))

def detect_objects_per_image(model, image_path, window_size=(30, 30), step_size=10):
    """하나의 이미지에 대해 객체를 탐지하는 함수"""
    model.eval()  # 모델을 평가 모드로 설정
    transform = transforms.Compose([transforms.Resize(window_size), transforms.ToTensor()])

    best_score = 0
    best_window = None
    best_location = None

    image = Image.open(image_path).convert('RGB')
    for (x, y, window) in sliding_window(image, step_size, window_size):
        window_tensor = transform(window).unsqueeze(0)  # 모델에 입력하기 위해 차원 추가
        output = model(window_tensor)
        score = torch.sigmoid(output).item()

        if score > best_score:
            best_score = score
            best_window = window
            best_location = (x, y)

    return best_score, best_location, best_window

# 모든 이미지에 대해 객체 탐지 실행
for filename in os.listdir(directory):
    if filename.endswith('.jpg'):
        image_path = os.path.join(directory, filename)
        score, location, window = detect_objects_per_image(model, image_path)
        print(f"Image: {filename}, Best Score: {score}, Location: {location}")
        if window is not None:
            save_path = os.path.join(save_directory, f"detected_{filename}")
            window.save(save_path)  # 윈도우 이미지를 지정된 경로에 저장


Image: frame_000000.jpg, Best Score: 0.6219912171363831, Location: (1160, 50)
Image: frame_000001.jpg, Best Score: 0.6219987273216248, Location: (1160, 50)
Image: frame_000002.jpg, Best Score: 0.6219974756240845, Location: (1160, 50)
Image: frame_000003.jpg, Best Score: 0.6219929456710815, Location: (1160, 50)
Image: frame_000004.jpg, Best Score: 0.6219984292984009, Location: (1160, 50)
Image: frame_000005.jpg, Best Score: 0.6219986081123352, Location: (1160, 50)
Image: frame_000006.jpg, Best Score: 0.6219914555549622, Location: (370, 120)
Image: frame_000007.jpg, Best Score: 0.6219949722290039, Location: (1160, 50)
Image: frame_000008.jpg, Best Score: 0.6219949722290039, Location: (1160, 50)
Image: frame_000009.jpg, Best Score: 0.6219949722290039, Location: (1160, 50)
Image: frame_000010.jpg, Best Score: 0.6219949722290039, Location: (1160, 50)
Image: frame_000011.jpg, Best Score: 0.6219949722290039, Location: (1160, 50)
Image: frame_000012.jpg, Best Score: 0.6219949722290039, Locatio