In [71]:
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from PIL import Image
from torchvision import transforms
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import os
from torchvision.io import read_image

In [72]:
# CSVファイルの読み込み
annotations = pd.read_csv("bananas_train_ano.csv", converters={"region_shape_attributes": eval})

annotations.head()

Unnamed: 0,filename,file_size,file_attributes,region_count,region_id,region_shape_attributes,region_attributes
0,b1.JPG,1865803,{},1,0,"{'name': 'polygon', 'all_points_x': [1294, 114...","{""banana"":""banana""}"
1,b2.JPG,1443106,{},1,0,"{'name': 'polygon', 'all_points_x': [2607, 229...","{""banana"":""banana""}"
2,b3.JPG,1391833,{},0,0,{},{}
3,b5.JPG,1483898,{},0,0,{},{}
4,b6.JPG,1415255,{},1,0,"{'name': 'polygon', 'all_points_x': [1200, 101...","{""banana"":""banana""}"


In [73]:
image_files = annotations["filename"]
region_raw_data=annotations["region_shape_attributes"]

In [74]:
# バウンディングボックス情報の取得
bounding_boxes = []
for shape_attr in region_raw_data:
    if shape_attr:  # {}空でない場合のみ処理を行う
        all_points_x = shape_attr["all_points_x"]  # キーが存在しない場合は空のリストを返す
        all_points_y = shape_attr["all_points_y"]  # キーが存在しない場合は空のリストを返す
        bounding_boxes.append((all_points_x, all_points_y))
    else:
        bounding_boxes.append(([0], [0]))  # 欠損値の場合は左のようなデフォルト値のリストを追加

print(bounding_boxes)

[([1294, 1149, 1433, 1500, 1089, 1258, 1784, 2135, 1954, 1585], [623, 762, 1131, 2014, 2915, 3048, 2764, 1996, 1185, 907]), ([2607, 2292, 2359, 1627, 284, 550, 1597, 2516, 2933, 2619], [569, 623, 1125, 1863, 2171, 2637, 2667, 2195, 1373, 1077]), ([0], [0]), ([0], [0]), ([1200, 1017, 1052, 1283, 1780, 2448, 3080, 3086, 2193, 1431], [1413, 1537, 1833, 2223, 2554, 2637, 2507, 2051, 2057, 1638]), ([1561, 1519, 1821, 2229, 2796, 3695, 3654, 2962, 2229, 1655], [2022, 2152, 2235, 1910, 1620, 1454, 1147, 946, 1129, 1667]), ([1117, 1212, 1620, 1981, 2406, 2607, 2572, 2211, 1561, 1188], [1555, 1744, 1579, 1525, 1638, 1608, 1413, 1129, 1171, 1354]), ([0], [0]), ([1649, 1939, 2318, 2856, 3086, 3068, 2720, 2371, 2022, 1679], [1117, 1626, 1785, 1898, 1785, 1638, 1443, 1336, 899, 384]), ([881, 1360, 1839, 2229, 2554, 2672, 2702, 2312, 1732, 1401], [1236, 1283, 1348, 1732, 2111, 2028, 1513, 1041, 863, 934]), ([0], [0]), ([792, 1016, 1439, 1833, 2498, 2589, 2462, 1893, 1403, 1034], [2268, 2431, 2195, 2

In [75]:
# x, yの最小値、最大値を格納したバウンディングボックスを作成する
bounding_boxes_minmax = []

for bounding_box in bounding_boxes:
    box_x = bounding_box[0]
    box_y = bounding_box[1]

    # バウンディングボックスの形式に変換 [xmin, ymin, xmax, ymax]
    xmin = min(box_x)
    ymin = min(box_y)
    xmax = max(box_x)
    ymax = max(box_y)
    
    if xmin == 0 and ymin == 0 and xmax == 0 and ymax == 0:
        bounding_boxes_minmax.append([1, 1, 2, 2]) #set bounding as defult
    else:
        bounding_boxes_minmax.append([xmin, ymin, xmax, ymax])

print(bounding_boxes_minmax)


[[1089, 623, 2135, 3048], [284, 569, 2933, 2667], [1, 1, 2, 2], [1, 1, 2, 2], [1017, 1413, 3086, 2637], [1519, 946, 3695, 2235], [1117, 1129, 2607, 1744], [1, 1, 2, 2], [1649, 384, 3086, 1898], [881, 863, 2702, 2111], [1, 1, 2, 2], [792, 1657, 2589, 2431], [1070, 1188, 2921, 2401], [1082, 928, 3051, 2057], [1, 1, 2, 2], [479, 514, 1762, 1519], [1862, 1188, 2601, 1892], [1041, 715, 3725, 2554], [1, 1, 2, 2], [656, 1106, 2944, 2578], [1276, 1077, 2050, 2643], [429, 1385, 2353, 3260], [1143, 738, 2262, 3405], [1, 1, 2, 2], [907, 1022, 1851, 3260], [1023, 1117, 3624, 2247], [1, 1, 2, 2], [2140, 225, 3512, 2448], [1, 1, 2, 2], [1077, 599, 1978, 2431], [733, 1017, 3287, 1992], [1685, 1419, 2607, 1821], [1709, 1431, 2489, 2241], [1413, 733, 2383, 1833], [1519, 603, 3482, 2530], [1, 1, 2, 2], [1638, 745, 3169, 2341], [1348, 1117, 2968, 2252], [1295, 1469, 2332, 2057], [625, 1863, 2339, 2754], [956, 1518, 1724, 3574], [865, 1137, 2038, 4016], [1, 1, 2, 2], [1, 1, 2, 2], [934, 378, 3920, 1561], 

In [76]:
# バナナの存在を示す labels リストを作成
labels = []

for bbox in bounding_boxes_minmax:
    # バウンディングボックスが存在しない場合（[0, 0, 0, 0]の場合）、labelを0に設定
    if bbox == [1, 1, 2, 2]:
        labels.append(0)
    else:
        labels.append(1)
        
print(labels)

[1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1]


In [77]:
# BananaDataset Classの作成
# imageパス、bounding_boxes_minmax,labelsの3つのデータを渡せる
class BananaDataset(Dataset):
    def __init__(self, image_files, bounding_boxes_minmax, labels,transform=None):
        self.image_files = image_files
        self.bounding_boxes_minmax = bounding_boxes_minmax
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        image = Image.open(self.image_files[idx]).convert("RGB")
        box = self.bounding_boxes_minmax[idx]  # bounding_boxesはすでに各画像に対するバウンディングボックスのリスト
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
        
        return image, box,label

In [78]:
# バナナ画像が格納されているディレクトリ
data_dir = "bananas_images_train"
# 画像ファイル名をフルパスに変換するヘルパー関数
def get_image_path(filename):
    return data_dir+'/'+filename

# データセットの作成
transform = transforms.Compose([transforms.ToTensor()])
dataset = BananaDataset([get_image_path(filename) for filename in image_files], bounding_boxes_minmax, labels,transform)


# データローダーの作成
data_loader = DataLoader(dataset, batch_size=1, shuffle=True)

# モデルのロード
model = fasterrcnn_resnet50_fpn(pretrained=True)
model.train()



FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [79]:
for image, box,label in dataset:
    print(image)
    print(box)
    print(label)
    print("-------")

tensor([[[0.3176, 0.3176, 0.3176,  ..., 0.1647, 0.1569, 0.1490],
         [0.3294, 0.3255, 0.3216,  ..., 0.1608, 0.1569, 0.1529],
         [0.3373, 0.3294, 0.3294,  ..., 0.1490, 0.1569, 0.1569],
         ...,
         [0.2784, 0.2863, 0.2980,  ..., 0.0471, 0.0510, 0.0549],
         [0.2902, 0.2863, 0.2980,  ..., 0.0471, 0.0510, 0.0549],
         [0.2980, 0.2824, 0.2784,  ..., 0.0510, 0.0549, 0.0588]],

        [[0.3412, 0.3412, 0.3373,  ..., 0.1608, 0.1529, 0.1451],
         [0.3529, 0.3490, 0.3412,  ..., 0.1569, 0.1529, 0.1490],
         [0.3608, 0.3529, 0.3490,  ..., 0.1451, 0.1529, 0.1529],
         ...,
         [0.2235, 0.2275, 0.2392,  ..., 0.0314, 0.0353, 0.0392],
         [0.2314, 0.2275, 0.2353,  ..., 0.0314, 0.0353, 0.0392],
         [0.2392, 0.2235, 0.2157,  ..., 0.0353, 0.0392, 0.0431]],

        [[0.3333, 0.3255, 0.3216,  ..., 0.1412, 0.1333, 0.1255],
         [0.3451, 0.3412, 0.3255,  ..., 0.1373, 0.1333, 0.1294],
         [0.3529, 0.3451, 0.3333,  ..., 0.1255, 0.1333, 0.

tensor([[[0.4471, 0.4353, 0.4314,  ..., 0.2471, 0.2431, 0.2353],
         [0.4353, 0.4314, 0.4314,  ..., 0.2353, 0.2353, 0.2275],
         [0.4275, 0.4314, 0.4353,  ..., 0.2353, 0.2353, 0.2353],
         ...,
         [0.1294, 0.1373, 0.1451,  ..., 0.2353, 0.2275, 0.2353],
         [0.1373, 0.1412, 0.1529,  ..., 0.2353, 0.2314, 0.2392],
         [0.1647, 0.1804, 0.1804,  ..., 0.2431, 0.2471, 0.2471]],

        [[0.4196, 0.4078, 0.4039,  ..., 0.1529, 0.1529, 0.1451],
         [0.4078, 0.4039, 0.4039,  ..., 0.1412, 0.1451, 0.1373],
         [0.4000, 0.4039, 0.4078,  ..., 0.1412, 0.1451, 0.1451],
         ...,
         [0.1216, 0.1294, 0.1255,  ..., 0.1529, 0.1451, 0.1608],
         [0.1294, 0.1333, 0.1333,  ..., 0.1608, 0.1569, 0.1647],
         [0.1569, 0.1608, 0.1608,  ..., 0.1686, 0.1725, 0.1725]],

        [[0.3882, 0.3765, 0.3725,  ..., 0.0039, 0.0000, 0.0000],
         [0.3765, 0.3725, 0.3725,  ..., 0.0000, 0.0000, 0.0000],
         [0.3686, 0.3725, 0.3765,  ..., 0.0000, 0.0000, 0.

tensor([[[0.7333, 0.7373, 0.7412,  ..., 0.0627, 0.0627, 0.0627],
         [0.7255, 0.7294, 0.7333,  ..., 0.0627, 0.0627, 0.0627],
         [0.7255, 0.7294, 0.7333,  ..., 0.0667, 0.0627, 0.0627],
         ...,
         [0.7412, 0.7373, 0.7373,  ..., 0.7176, 0.7255, 0.7255],
         [0.7451, 0.7451, 0.7451,  ..., 0.7176, 0.7255, 0.7216],
         [0.7412, 0.7412, 0.7529,  ..., 0.7176, 0.7216, 0.7176]],

        [[0.6235, 0.6275, 0.6314,  ..., 0.0471, 0.0471, 0.0471],
         [0.6157, 0.6196, 0.6235,  ..., 0.0471, 0.0471, 0.0471],
         [0.6157, 0.6196, 0.6235,  ..., 0.0510, 0.0471, 0.0471],
         ...,
         [0.6000, 0.6039, 0.6118,  ..., 0.6000, 0.5961, 0.5961],
         [0.6039, 0.6039, 0.6157,  ..., 0.6000, 0.5961, 0.5922],
         [0.6039, 0.6039, 0.6235,  ..., 0.6000, 0.5922, 0.5882]],

        [[0.4353, 0.4392, 0.4431,  ..., 0.0000, 0.0000, 0.0000],
         [0.4275, 0.4314, 0.4353,  ..., 0.0000, 0.0000, 0.0000],
         [0.4275, 0.4314, 0.4353,  ..., 0.0039, 0.0000, 0.

tensor([[[0.1176, 0.1216, 0.1216,  ..., 0.0118, 0.0078, 0.0078],
         [0.1216, 0.1216, 0.1255,  ..., 0.0118, 0.0118, 0.0078],
         [0.1255, 0.1255, 0.1255,  ..., 0.0118, 0.0118, 0.0118],
         ...,
         [0.0431, 0.0471, 0.0471,  ..., 0.0431, 0.0471, 0.0471],
         [0.0471, 0.0471, 0.0510,  ..., 0.0431, 0.0471, 0.0471],
         [0.0510, 0.0510, 0.0510,  ..., 0.0431, 0.0471, 0.0510]],

        [[0.0941, 0.0980, 0.0980,  ..., 0.0118, 0.0078, 0.0078],
         [0.0980, 0.0980, 0.1020,  ..., 0.0118, 0.0118, 0.0078],
         [0.1020, 0.1020, 0.1020,  ..., 0.0118, 0.0118, 0.0118],
         ...,
         [0.0314, 0.0353, 0.0353,  ..., 0.0392, 0.0431, 0.0431],
         [0.0353, 0.0353, 0.0392,  ..., 0.0392, 0.0431, 0.0431],
         [0.0392, 0.0392, 0.0392,  ..., 0.0392, 0.0431, 0.0471]],

        [[0.0314, 0.0353, 0.0353,  ..., 0.0118, 0.0078, 0.0078],
         [0.0353, 0.0353, 0.0392,  ..., 0.0118, 0.0118, 0.0078],
         [0.0392, 0.0392, 0.0392,  ..., 0.0118, 0.0118, 0.

tensor([[[0.7569, 0.7608, 0.7608,  ..., 1.0000, 1.0000, 1.0000],
         [0.7686, 0.7686, 0.7647,  ..., 1.0000, 1.0000, 1.0000],
         [0.7765, 0.7725, 0.7725,  ..., 1.0000, 1.0000, 1.0000],
         ...,
         [0.7569, 0.7529, 0.7569,  ..., 0.4863, 0.4902, 0.4980],
         [0.7529, 0.7451, 0.7490,  ..., 0.4863, 0.4902, 0.4980],
         [0.7529, 0.7412, 0.7412,  ..., 0.4824, 0.4902, 0.4902]],

        [[0.6392, 0.6431, 0.6431,  ..., 0.6667, 0.6667, 0.6706],
         [0.6510, 0.6510, 0.6471,  ..., 0.6667, 0.6706, 0.6745],
         [0.6588, 0.6549, 0.6549,  ..., 0.6706, 0.6745, 0.6784],
         ...,
         [0.5686, 0.5647, 0.5686,  ..., 0.4157, 0.4157, 0.4118],
         [0.5647, 0.5569, 0.5608,  ..., 0.4196, 0.4157, 0.4118],
         [0.5647, 0.5529, 0.5529,  ..., 0.4157, 0.4157, 0.4157]],

        [[0.4039, 0.4078, 0.4078,  ..., 0.1373, 0.1373, 0.1333],
         [0.4157, 0.4157, 0.4118,  ..., 0.1373, 0.1412, 0.1451],
         [0.4235, 0.4196, 0.4196,  ..., 0.1412, 0.1451, 0.

tensor([[[0.5686, 0.5725, 0.5765,  ..., 0.5294, 0.5294, 0.5412],
         [0.5647, 0.5686, 0.5725,  ..., 0.5294, 0.5255, 0.5333],
         [0.5608, 0.5647, 0.5686,  ..., 0.5294, 0.5255, 0.5294],
         ...,
         [0.5216, 0.5294, 0.5333,  ..., 0.4510, 0.4431, 0.4431],
         [0.5098, 0.5216, 0.5294,  ..., 0.4471, 0.4431, 0.4471],
         [0.5059, 0.5176, 0.5294,  ..., 0.4471, 0.4431, 0.4510]],

        [[0.5412, 0.5451, 0.5490,  ..., 0.5020, 0.5020, 0.5137],
         [0.5373, 0.5412, 0.5451,  ..., 0.5020, 0.4980, 0.5059],
         [0.5333, 0.5373, 0.5412,  ..., 0.5020, 0.4980, 0.5020],
         ...,
         [0.4863, 0.4941, 0.4980,  ..., 0.4275, 0.4196, 0.4196],
         [0.4745, 0.4863, 0.4941,  ..., 0.4235, 0.4196, 0.4235],
         [0.4706, 0.4824, 0.4941,  ..., 0.4235, 0.4196, 0.4275]],

        [[0.5098, 0.5137, 0.5176,  ..., 0.4706, 0.4706, 0.4824],
         [0.5059, 0.5098, 0.5137,  ..., 0.4706, 0.4667, 0.4745],
         [0.5020, 0.5059, 0.5098,  ..., 0.4706, 0.4667, 0.

tensor([[[0.6275, 0.6314, 0.6314,  ..., 0.4824, 0.4784, 0.4784],
         [0.6235, 0.6275, 0.6275,  ..., 0.4824, 0.4784, 0.4784],
         [0.6235, 0.6196, 0.6196,  ..., 0.4784, 0.4745, 0.4745],
         ...,
         [0.6000, 0.5961, 0.5922,  ..., 0.3412, 0.3490, 0.3490],
         [0.6000, 0.5922, 0.5843,  ..., 0.3490, 0.3569, 0.3569],
         [0.6039, 0.5922, 0.5804,  ..., 0.3373, 0.3451, 0.3490]],

        [[0.5529, 0.5569, 0.5569,  ..., 0.4431, 0.4392, 0.4392],
         [0.5490, 0.5529, 0.5529,  ..., 0.4431, 0.4392, 0.4392],
         [0.5490, 0.5451, 0.5451,  ..., 0.4392, 0.4353, 0.4353],
         ...,
         [0.5137, 0.5098, 0.5059,  ..., 0.3176, 0.3255, 0.3255],
         [0.5137, 0.5059, 0.4980,  ..., 0.3255, 0.3333, 0.3333],
         [0.5176, 0.5059, 0.4941,  ..., 0.3137, 0.3216, 0.3255]],

        [[0.3882, 0.3922, 0.3922,  ..., 0.3059, 0.3020, 0.3020],
         [0.3843, 0.3882, 0.3882,  ..., 0.3059, 0.3020, 0.3020],
         [0.3843, 0.3804, 0.3804,  ..., 0.2980, 0.2941, 0.

tensor([[[0.6706, 0.6706, 0.6667,  ..., 0.5373, 0.5333, 0.5255],
         [0.6706, 0.6706, 0.6667,  ..., 0.5333, 0.5294, 0.5255],
         [0.6706, 0.6706, 0.6667,  ..., 0.5333, 0.5333, 0.5294],
         ...,
         [0.6431, 0.6471, 0.6431,  ..., 0.4549, 0.4510, 0.4431],
         [0.6431, 0.6471, 0.6471,  ..., 0.4510, 0.4510, 0.4471],
         [0.6431, 0.6471, 0.6471,  ..., 0.4510, 0.4549, 0.4588]],

        [[0.5882, 0.5882, 0.5843,  ..., 0.4745, 0.4706, 0.4627],
         [0.5882, 0.5882, 0.5843,  ..., 0.4706, 0.4667, 0.4627],
         [0.5882, 0.5882, 0.5843,  ..., 0.4706, 0.4706, 0.4667],
         ...,
         [0.5922, 0.5961, 0.5922,  ..., 0.3725, 0.3686, 0.3608],
         [0.5922, 0.5961, 0.5961,  ..., 0.3686, 0.3686, 0.3647],
         [0.5922, 0.5961, 0.5961,  ..., 0.3686, 0.3725, 0.3765]],

        [[0.5059, 0.5059, 0.5020,  ..., 0.3725, 0.3686, 0.3608],
         [0.5059, 0.5059, 0.5020,  ..., 0.3686, 0.3647, 0.3608],
         [0.5059, 0.5059, 0.5020,  ..., 0.3686, 0.3686, 0.

In [80]:
for image, box,label in data_loader:
    print(image)
    print(box)
    print(label)
    print("-------")

tensor([[[[0.7333, 0.7373, 0.7333,  ..., 0.7882, 0.7961, 0.8039],
          [0.7333, 0.7333, 0.7294,  ..., 0.7882, 0.7961, 0.8000],
          [0.7333, 0.7294, 0.7294,  ..., 0.7922, 0.7961, 0.7961],
          ...,
          [0.7804, 0.7843, 0.7882,  ..., 0.0392, 0.0392, 0.0431],
          [0.7843, 0.7882, 0.7922,  ..., 0.0353, 0.0431, 0.0471],
          [0.7882, 0.7961, 0.8000,  ..., 0.0353, 0.0431, 0.0510]],

         [[0.6510, 0.6549, 0.6510,  ..., 0.6980, 0.7059, 0.7137],
          [0.6510, 0.6510, 0.6471,  ..., 0.6980, 0.7059, 0.7098],
          [0.6510, 0.6471, 0.6471,  ..., 0.7020, 0.7059, 0.7059],
          ...,
          [0.6941, 0.6980, 0.7020,  ..., 0.0353, 0.0353, 0.0392],
          [0.6980, 0.7020, 0.7059,  ..., 0.0314, 0.0392, 0.0431],
          [0.7020, 0.7098, 0.7137,  ..., 0.0314, 0.0392, 0.0471]],

         [[0.5294, 0.5333, 0.5294,  ..., 0.5765, 0.5843, 0.5922],
          [0.5294, 0.5294, 0.5255,  ..., 0.5765, 0.5843, 0.5882],
          [0.5294, 0.5255, 0.5255,  ..., 0

tensor([[[[0.5412, 0.5412, 0.5373,  ..., 0.2627, 0.2745, 0.2784],
          [0.5373, 0.5412, 0.5412,  ..., 0.2627, 0.2627, 0.2549],
          [0.5451, 0.5490, 0.5490,  ..., 0.2784, 0.2667, 0.2431],
          ...,
          [0.6706, 0.6745, 0.6784,  ..., 0.3059, 0.3098, 0.3059],
          [0.6667, 0.6706, 0.6745,  ..., 0.3020, 0.3098, 0.3059],
          [0.6706, 0.6706, 0.6667,  ..., 0.3059, 0.3098, 0.3137]],

         [[0.4471, 0.4510, 0.4471,  ..., 0.1804, 0.1922, 0.1961],
          [0.4471, 0.4510, 0.4510,  ..., 0.1804, 0.1804, 0.1725],
          [0.4471, 0.4510, 0.4510,  ..., 0.1882, 0.1765, 0.1529],
          ...,
          [0.4314, 0.4353, 0.4392,  ..., 0.2824, 0.2745, 0.2706],
          [0.4275, 0.4314, 0.4353,  ..., 0.2784, 0.2745, 0.2706],
          [0.4314, 0.4314, 0.4275,  ..., 0.2706, 0.2745, 0.2706]],

         [[0.3373, 0.3294, 0.3255,  ..., 0.1059, 0.1098, 0.1137],
          [0.3255, 0.3294, 0.3255,  ..., 0.1059, 0.1059, 0.0980],
          [0.3294, 0.3294, 0.3294,  ..., 0

tensor([[[[0.4745, 0.4706, 0.4667,  ..., 0.2980, 0.3333, 0.2588],
          [0.4745, 0.4706, 0.4667,  ..., 0.2706, 0.3255, 0.2588],
          [0.4824, 0.4745, 0.4706,  ..., 0.2392, 0.2902, 0.2510],
          ...,
          [0.7137, 0.7059, 0.7020,  ..., 0.2980, 0.2980, 0.3020],
          [0.7059, 0.7059, 0.7059,  ..., 0.2980, 0.3020, 0.3059],
          [0.7020, 0.7059, 0.7059,  ..., 0.2980, 0.3020, 0.3098]],

         [[0.3529, 0.3490, 0.3451,  ..., 0.2471, 0.3059, 0.2314],
          [0.3529, 0.3490, 0.3451,  ..., 0.2196, 0.2863, 0.2314],
          [0.3608, 0.3529, 0.3490,  ..., 0.1882, 0.2471, 0.2118],
          ...,
          [0.6000, 0.5922, 0.5882,  ..., 0.2118, 0.2118, 0.2157],
          [0.5922, 0.5922, 0.5922,  ..., 0.2118, 0.2157, 0.2196],
          [0.5882, 0.5922, 0.5922,  ..., 0.2118, 0.2157, 0.2235]],

         [[0.1294, 0.1255, 0.1216,  ..., 0.0745, 0.1020, 0.0157],
          [0.1294, 0.1255, 0.1216,  ..., 0.0471, 0.0941, 0.0196],
          [0.1373, 0.1294, 0.1255,  ..., 0

tensor([[[[0.0588, 0.0588, 0.0627,  ..., 0.1569, 0.1529, 0.1490],
          [0.0627, 0.0588, 0.0588,  ..., 0.1569, 0.1569, 0.1529],
          [0.0627, 0.0588, 0.0588,  ..., 0.1608, 0.1647, 0.1608],
          ...,
          [0.3569, 0.3569, 0.3529,  ..., 0.2039, 0.1882, 0.1725],
          [0.3843, 0.3608, 0.3451,  ..., 0.2039, 0.1961, 0.1922],
          [0.4157, 0.3765, 0.3608,  ..., 0.2039, 0.2078, 0.2078]],

         [[0.0549, 0.0549, 0.0588,  ..., 0.1216, 0.1176, 0.1137],
          [0.0588, 0.0549, 0.0549,  ..., 0.1216, 0.1216, 0.1176],
          [0.0588, 0.0549, 0.0549,  ..., 0.1255, 0.1294, 0.1255],
          ...,
          [0.2667, 0.2667, 0.2627,  ..., 0.1373, 0.1216, 0.1059],
          [0.2941, 0.2706, 0.2549,  ..., 0.1373, 0.1294, 0.1255],
          [0.3255, 0.2863, 0.2706,  ..., 0.1373, 0.1412, 0.1412]],

         [[0.0471, 0.0471, 0.0510,  ..., 0.1020, 0.0980, 0.0941],
          [0.0510, 0.0471, 0.0471,  ..., 0.1020, 0.1020, 0.0980],
          [0.0510, 0.0471, 0.0471,  ..., 0

tensor([[[[0.8078, 0.8078, 0.8078,  ..., 0.6863, 0.6863, 0.6863],
          [0.8078, 0.8078, 0.8078,  ..., 0.6863, 0.6863, 0.6902],
          [0.8039, 0.8039, 0.8078,  ..., 0.6863, 0.6902, 0.6941],
          ...,
          [0.7569, 0.7569, 0.7608,  ..., 0.0667, 0.0627, 0.0627],
          [0.7647, 0.7608, 0.7647,  ..., 0.0627, 0.0627, 0.0667],
          [0.7686, 0.7686, 0.7686,  ..., 0.0588, 0.0627, 0.0667]],

         [[0.7373, 0.7373, 0.7373,  ..., 0.6392, 0.6392, 0.6392],
          [0.7373, 0.7373, 0.7373,  ..., 0.6392, 0.6392, 0.6431],
          [0.7333, 0.7333, 0.7373,  ..., 0.6392, 0.6431, 0.6471],
          ...,
          [0.6902, 0.6902, 0.6941,  ..., 0.0471, 0.0431, 0.0431],
          [0.6980, 0.6941, 0.6980,  ..., 0.0431, 0.0431, 0.0471],
          [0.7020, 0.7020, 0.7020,  ..., 0.0392, 0.0431, 0.0471]],

         [[0.6510, 0.6510, 0.6588,  ..., 0.5529, 0.5529, 0.5529],
          [0.6510, 0.6588, 0.6588,  ..., 0.5529, 0.5529, 0.5569],
          [0.6471, 0.6549, 0.6588,  ..., 0

tensor([[[[0.7216, 0.7216, 0.7216,  ..., 0.0471, 0.0627, 0.0745],
          [0.7176, 0.7176, 0.7137,  ..., 0.0627, 0.0706, 0.0745],
          [0.7137, 0.7137, 0.7098,  ..., 0.0745, 0.0706, 0.0627],
          ...,
          [0.7569, 0.7569, 0.7529,  ..., 0.5922, 0.5922, 0.5922],
          [0.7569, 0.7569, 0.7529,  ..., 0.5922, 0.5922, 0.5922],
          [0.7569, 0.7569, 0.7529,  ..., 0.5922, 0.5882, 0.5882]],

         [[0.5490, 0.5490, 0.5490,  ..., 0.0431, 0.0588, 0.0706],
          [0.5451, 0.5451, 0.5412,  ..., 0.0588, 0.0667, 0.0706],
          [0.5412, 0.5412, 0.5373,  ..., 0.0706, 0.0667, 0.0588],
          ...,
          [0.5843, 0.5843, 0.5804,  ..., 0.4314, 0.4314, 0.4314],
          [0.5843, 0.5843, 0.5804,  ..., 0.4314, 0.4314, 0.4314],
          [0.5843, 0.5843, 0.5804,  ..., 0.4314, 0.4275, 0.4275]],

         [[0.4039, 0.4039, 0.4039,  ..., 0.0235, 0.0392, 0.0510],
          [0.4000, 0.4000, 0.3961,  ..., 0.0392, 0.0471, 0.0510],
          [0.3961, 0.3961, 0.3922,  ..., 0

tensor([[[[0.2314, 0.2275, 0.2275,  ..., 0.1373, 0.1373, 0.1412],
          [0.2275, 0.2275, 0.2235,  ..., 0.1373, 0.1373, 0.1373],
          [0.2235, 0.2235, 0.2235,  ..., 0.1373, 0.1333, 0.1333],
          ...,
          [0.2039, 0.2039, 0.2000,  ..., 0.1294, 0.1294, 0.1294],
          [0.2118, 0.2078, 0.2039,  ..., 0.1333, 0.1294, 0.1294],
          [0.2196, 0.2118, 0.2078,  ..., 0.1333, 0.1294, 0.1294]],

         [[0.1843, 0.1804, 0.1804,  ..., 0.1137, 0.1137, 0.1216],
          [0.1804, 0.1804, 0.1765,  ..., 0.1137, 0.1137, 0.1137],
          [0.1765, 0.1765, 0.1765,  ..., 0.1137, 0.1098, 0.1098],
          ...,
          [0.1490, 0.1490, 0.1451,  ..., 0.1216, 0.1216, 0.1216],
          [0.1569, 0.1529, 0.1490,  ..., 0.1255, 0.1216, 0.1216],
          [0.1647, 0.1569, 0.1529,  ..., 0.1255, 0.1216, 0.1216]],

         [[0.0824, 0.0784, 0.0784,  ..., 0.0118, 0.0118, 0.0078],
          [0.0784, 0.0784, 0.0745,  ..., 0.0118, 0.0118, 0.0118],
          [0.0745, 0.0745, 0.0745,  ..., 0

tensor([[[[0.6706, 0.6706, 0.6667,  ..., 0.5373, 0.5333, 0.5255],
          [0.6706, 0.6706, 0.6667,  ..., 0.5333, 0.5294, 0.5255],
          [0.6706, 0.6706, 0.6667,  ..., 0.5333, 0.5333, 0.5294],
          ...,
          [0.6431, 0.6471, 0.6431,  ..., 0.4549, 0.4510, 0.4431],
          [0.6431, 0.6471, 0.6471,  ..., 0.4510, 0.4510, 0.4471],
          [0.6431, 0.6471, 0.6471,  ..., 0.4510, 0.4549, 0.4588]],

         [[0.5882, 0.5882, 0.5843,  ..., 0.4745, 0.4706, 0.4627],
          [0.5882, 0.5882, 0.5843,  ..., 0.4706, 0.4667, 0.4627],
          [0.5882, 0.5882, 0.5843,  ..., 0.4706, 0.4706, 0.4667],
          ...,
          [0.5922, 0.5961, 0.5922,  ..., 0.3725, 0.3686, 0.3608],
          [0.5922, 0.5961, 0.5961,  ..., 0.3686, 0.3686, 0.3647],
          [0.5922, 0.5961, 0.5961,  ..., 0.3686, 0.3725, 0.3765]],

         [[0.5059, 0.5059, 0.5020,  ..., 0.3725, 0.3686, 0.3608],
          [0.5059, 0.5059, 0.5020,  ..., 0.3686, 0.3647, 0.3608],
          [0.5059, 0.5059, 0.5020,  ..., 0

tensor([[[[0.0824, 0.0824, 0.0824,  ..., 0.0392, 0.0392, 0.0392],
          [0.0824, 0.0784, 0.0824,  ..., 0.0392, 0.0392, 0.0392],
          [0.0784, 0.0784, 0.0784,  ..., 0.0392, 0.0353, 0.0353],
          ...,
          [0.0235, 0.0235, 0.0275,  ..., 0.0078, 0.0078, 0.0078],
          [0.0275, 0.0275, 0.0275,  ..., 0.0078, 0.0078, 0.0078],
          [0.0275, 0.0275, 0.0275,  ..., 0.0078, 0.0078, 0.0078]],

         [[0.0706, 0.0706, 0.0706,  ..., 0.0353, 0.0353, 0.0353],
          [0.0706, 0.0667, 0.0706,  ..., 0.0353, 0.0353, 0.0353],
          [0.0667, 0.0667, 0.0667,  ..., 0.0353, 0.0314, 0.0314],
          ...,
          [0.0157, 0.0157, 0.0196,  ..., 0.0078, 0.0078, 0.0078],
          [0.0196, 0.0196, 0.0196,  ..., 0.0078, 0.0078, 0.0078],
          [0.0196, 0.0196, 0.0196,  ..., 0.0078, 0.0078, 0.0078]],

         [[0.0118, 0.0118, 0.0118,  ..., 0.0196, 0.0196, 0.0196],
          [0.0118, 0.0078, 0.0118,  ..., 0.0196, 0.0196, 0.0196],
          [0.0078, 0.0078, 0.0078,  ..., 0

In [81]:
# デバイスの指定
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# オプティマイザの設定
optimizer = torch.optim.SGD(model.parameters(), lr=0.005)

# 学習のループ
for image_tensors, targets,label in data_loader:
    image_tensors = list(image.to(device) for image in image_tensors)

    # ターゲットの形式を正しく組み立てる
    target_list = []

    # バウンディングボックスをテンソルに変換
    target_tensor = {
        "boxes": torch.tensor([targets], dtype=torch.float32, device=device),
        "labels": torch.tensor([label], dtype=torch.int64),
        "image_id": torch.tensor([[]], dtype=torch.int64),
        "area": torch.tensor([[]], dtype=torch.float32),
        "iscrowd": torch.tensor([[]], dtype=torch.int64),
    }
    target_list.append(target_tensor)
        

#     print(targets)
#     print("------")
#     このモデルは物体が存在しない画像はのぞいて学習させた
    optimizer.zero_grad()
    loss_dict = model(image_tensors, target_list)
    losses = sum(loss for loss in loss_dict.values())
    losses.backward()
    optimizer.step()

    print(target_tensor)

{'boxes': tensor([[1685., 1419., 2607., 1821.]]), 'labels': tensor([1]), 'image_id': tensor([], size=(1, 0), dtype=torch.int64), 'area': tensor([], size=(1, 0)), 'iscrowd': tensor([], size=(1, 0), dtype=torch.int64)}
{'boxes': tensor([[1295., 1469., 2332., 2057.]]), 'labels': tensor([1]), 'image_id': tensor([], size=(1, 0), dtype=torch.int64), 'area': tensor([], size=(1, 0)), 'iscrowd': tensor([], size=(1, 0), dtype=torch.int64)}
{'boxes': tensor([[ 429., 1385., 2353., 3260.]]), 'labels': tensor([1]), 'image_id': tensor([], size=(1, 0), dtype=torch.int64), 'area': tensor([], size=(1, 0)), 'iscrowd': tensor([], size=(1, 0), dtype=torch.int64)}
{'boxes': tensor([[1023., 1117., 3624., 2247.]]), 'labels': tensor([1]), 'image_id': tensor([], size=(1, 0), dtype=torch.int64), 'area': tensor([], size=(1, 0)), 'iscrowd': tensor([], size=(1, 0), dtype=torch.int64)}
{'boxes': tensor([[1., 1., 2., 2.]]), 'labels': tensor([0]), 'image_id': tensor([], size=(1, 0), dtype=torch.int64), 'area': tensor(

{'boxes': tensor([[1., 1., 2., 2.]]), 'labels': tensor([0]), 'image_id': tensor([], size=(1, 0), dtype=torch.int64), 'area': tensor([], size=(1, 0)), 'iscrowd': tensor([], size=(1, 0), dtype=torch.int64)}
{'boxes': tensor([[ 922.,  881., 3695., 2028.]]), 'labels': tensor([1]), 'image_id': tensor([], size=(1, 0), dtype=torch.int64), 'area': tensor([], size=(1, 0)), 'iscrowd': tensor([], size=(1, 0), dtype=torch.int64)}
{'boxes': tensor([[1519.,  603., 3482., 2530.]]), 'labels': tensor([1]), 'image_id': tensor([], size=(1, 0), dtype=torch.int64), 'area': tensor([], size=(1, 0)), 'iscrowd': tensor([], size=(1, 0), dtype=torch.int64)}
{'boxes': tensor([[1., 1., 2., 2.]]), 'labels': tensor([0]), 'image_id': tensor([], size=(1, 0), dtype=torch.int64), 'area': tensor([], size=(1, 0)), 'iscrowd': tensor([], size=(1, 0), dtype=torch.int64)}
{'boxes': tensor([[1017., 1413., 3086., 2637.]]), 'labels': tensor([1]), 'image_id': tensor([], size=(1, 0), dtype=torch.int64), 'area': tensor([], size=(1,

In [114]:
import torchvision.transforms as T
from PIL import Image, ImageDraw

# 1. 画像の前処理
def preprocess_image(image):
    transform = T.Compose([
        T.ToTensor(),  # 画像をテンソルに変換
    ])
    return transform(image).unsqueeze(0)  # バッチ次元を追加

# 2. モデルに画像を渡して推論
def predict(model, image_tensor):
    model.eval()
    with torch.no_grad():
        image_tensor = image_tensor.to(device)
        predictions = model(image_tensor)

    return predictions

# 3. バウンディングボックスを描画
def draw_boxes(image, predictions, threshold=0.5):
    draw = ImageDraw.Draw(image)
    best_score = 0.0
    best_box = None
    for score, label, box in zip(predictions['scores'], predictions['labels'], predictions['boxes']):
        if score > threshold and score > best_score:
            best_score = score
            best_box = box

    if best_box is not None:
        best_box = [round(i, 2) for i in best_box.tolist()]  # バウンディングボックスの座標を整数に変換
        draw.rectangle(best_box, outline="red", width=3)
    
    return image

# 画像ファイルの読み込み
i1 = 'bananas_images_test/b10.JPG'
i2 = 'bananas_images_test/b25.JPG'
i3 = 'bananas_images_test/b32.JPG'
i4 = 'bananas_images_test/b34.JPG'   #no banana
i5 = 'bananas_images_test/b4.JPG'  #  yes banana
i6 = 'bananas_images_test/b48.JPG' #yes banana
i7 = 'bananas_images_test/b57.JPG'  #yes banana
i8 = 'bananas_images_test/b64.JPG'    #no banana
i9 = 'bananas_images_test/b66.JPG'   #yes banana
i10 = 'bananas_images_test/b8.JPG'    #yes banana

image_list=[i1,i2,i3,i4,i5,i6,i7,i8,i9,i10]

for image_path in image_list:
    image = Image.open(image_path)
    # 画像の前処理
    image_tensor = preprocess_image(image)
    # モデルに画像を渡して推論
    predictions = predict(model, image_tensor)
    # バウンディングボックスを描画
    result_image = draw_boxes(image.copy(), predictions[0], threshold=0.5)
    # 結果の画像を表示
    result_image.show()

# モデルを保存する

In [115]:
import pickle

with open('bananaDetect2.pkl','wb') as f:
    pickle.dump(model,f)

# 次に、作成したmodelの評価をvalデータで行う。

In [116]:
# CSVファイルの読み込み
annotations_val = pd.read_csv("bananas_val_ano.csv", converters={"region_shape_attributes": eval})
annotations_val.head()

Unnamed: 0,filename,file_size,file_attributes,region_count,region_id,region_shape_attributes,region_attributes
0,b4.JPG,1633612,{},1,0,"{'name': 'polygon', 'all_points_x': [1272, 102...","{""banana"":""banana""}"
1,b8.JPG,1540167,{},1,0,"{'name': 'polygon', 'all_points_x': [1148, 938...",{}
2,b10.JPG,1545982,{},0,0,{},{}
3,b25.JPG,1851131,{},1,0,"{'name': 'polygon', 'all_points_x': [1191, 901...","{""banana"":""""}"
4,b32.JPG,1675185,{},1,0,"{'name': 'polygon', 'all_points_x': [911, 744,...","{""banana"":""banana""}"


In [117]:
image_files_val = annotations_val["filename"]
region_raw_data_val=annotations_val["region_shape_attributes"]

# バウンディングボックス情報の取得
bounding_boxes_val = []
for shape_attr in region_raw_data_val:
    if shape_attr:  # {}空でない場合のみ処理を行う
        all_points_x = shape_attr["all_points_x"]  # キーが存在しない場合は空のリストを返す
        all_points_y = shape_attr["all_points_y"]  # キーが存在しない場合は空のリストを返す
        bounding_boxes_val.append((all_points_x, all_points_y))
    else:
        bounding_boxes_val.append(([0], [0]))  # 欠損値の場合は左のようなデフォルト値のリストを追加

        
# x, yの最小値、最大値を格納したバウンディングボックスを作成する
bounding_boxes_minmax_val = []

for bounding_box in bounding_boxes_val:
    box_x = bounding_box[0]
    box_y = bounding_box[1]

    # バウンディングボックスの形式に変換 [xmin, ymin, xmax, ymax]
    xmin = min(box_x)
    ymin = min(box_y)
    xmax = max(box_x)
    ymax = max(box_y)
    
    if xmin == 0 and ymin == 0 and xmax == 0 and ymax == 0:
        bounding_boxes_minmax_val.append([1, 1, 2, 2]) #set bounding as defult
    else:
        bounding_boxes_minmax_val.append([xmin, ymin, xmax, ymax])
        
# バナナの存在を示す labels リストを作成
labels_val = []

for bbox in bounding_boxes_minmax_val:
    # バウンディングボックスが存在しない場合（[0, 0, 0, 0]の場合）、labelを0に設定
    if bbox == [1, 1, 2, 2]:
        labels_val.append(0)
    else:
        labels_val.append(1)
        
        
print(bounding_boxes_minmax_val)
print(labels_val)

[[1024, 965, 3450, 2032], [868, 367, 3601, 1331], [1, 1, 2, 2], [696, 1034, 1754, 3828], [744, 1089, 3099, 2000], [1, 1, 2, 2], [1252, 1179, 2431, 2885], [1207, 911, 2938, 2232], [1, 1, 2, 2], [448, 1500, 2220, 2389]]
[1, 1, 0, 1, 1, 0, 1, 1, 0, 1]


In [118]:
# バナナ画像が格納されているディレクトリ
data_dir_val = "bananas_images_test"
# 画像ファイル名をフルパスに変換するヘルパー関数
def get_image_path_val(filename):
    return data_dir_val+'/'+filename


# データセットの作成
transform = transforms.Compose([transforms.ToTensor()])
dataset_val = BananaDataset([get_image_path_val(filename) for filename in image_files_val], bounding_boxes_minmax_val, labels_val,transform)


# データローダーの作成
data_loader_val = DataLoader(dataset_val, batch_size=1, shuffle=True)


In [119]:
for image, box,label in data_loader_val:
    print(image)
    print(box)
    print(label)
    print("-------")

tensor([[[[0.5255, 0.5294, 0.5216,  ..., 0.5412, 0.5490, 0.5569],
          [0.5137, 0.5137, 0.5137,  ..., 0.5373, 0.5412, 0.5412],
          [0.5098, 0.5059, 0.5059,  ..., 0.5412, 0.5412, 0.5373],
          ...,
          [0.0588, 0.0588, 0.0588,  ..., 0.0157, 0.0118, 0.0078],
          [0.0588, 0.0588, 0.0549,  ..., 0.0118, 0.0078, 0.0039],
          [0.0627, 0.0588, 0.0510,  ..., 0.0118, 0.0039, 0.0000]],

         [[0.4784, 0.4824, 0.4863,  ..., 0.5137, 0.5216, 0.5294],
          [0.4784, 0.4784, 0.4784,  ..., 0.5098, 0.5137, 0.5137],
          [0.4745, 0.4706, 0.4706,  ..., 0.5137, 0.5137, 0.5098],
          ...,
          [0.0706, 0.0706, 0.0706,  ..., 0.0157, 0.0118, 0.0078],
          [0.0706, 0.0706, 0.0667,  ..., 0.0118, 0.0078, 0.0039],
          [0.0745, 0.0706, 0.0627,  ..., 0.0118, 0.0039, 0.0000]],

         [[0.4157, 0.4196, 0.4196,  ..., 0.4510, 0.4588, 0.4667],
          [0.4118, 0.4118, 0.4118,  ..., 0.4471, 0.4510, 0.4510],
          [0.4078, 0.4039, 0.4039,  ..., 0

tensor([[[[0.6039, 0.6078, 0.6157,  ..., 0.6039, 0.6078, 0.6118],
          [0.6000, 0.6039, 0.6118,  ..., 0.6039, 0.6039, 0.6078],
          [0.5961, 0.6000, 0.6078,  ..., 0.6000, 0.6000, 0.6000],
          ...,
          [0.0157, 0.0157, 0.0157,  ..., 0.0118, 0.0118, 0.0118],
          [0.0118, 0.0118, 0.0118,  ..., 0.0118, 0.0118, 0.0118],
          [0.0118, 0.0078, 0.0078,  ..., 0.0078, 0.0078, 0.0118]],

         [[0.5569, 0.5608, 0.5686,  ..., 0.5725, 0.5765, 0.5804],
          [0.5529, 0.5569, 0.5647,  ..., 0.5725, 0.5725, 0.5765],
          [0.5490, 0.5529, 0.5608,  ..., 0.5686, 0.5686, 0.5686],
          ...,
          [0.0157, 0.0157, 0.0157,  ..., 0.0118, 0.0118, 0.0118],
          [0.0118, 0.0118, 0.0118,  ..., 0.0118, 0.0118, 0.0118],
          [0.0118, 0.0078, 0.0078,  ..., 0.0078, 0.0078, 0.0118]],

         [[0.5020, 0.5059, 0.5137,  ..., 0.5294, 0.5333, 0.5373],
          [0.4980, 0.5020, 0.5098,  ..., 0.5216, 0.5294, 0.5333],
          [0.4941, 0.4980, 0.5059,  ..., 0

# modelにテスト用データを渡して予測性能を評価↓

In [120]:
model.eval()  # モデルを評価モードに切り替える
predictions = []  # 予測結果を格納するリスト

with torch.no_grad():
    for image, boxes,label in data_loader_val:
        image = image.to(device)  # GPUへデータを移動（必要に応じて）
        outputs = model(image)  # モデルから予測を取得
        predictions.append(outputs)
        print(outputs)

# with torch.no_grad():
#     for idx in range(len(dataset_val)):
#         image, boxes, actual_labels = dataset_val[idx]

#         # モデルから予測を取得
#         image = image.to(device)  # GPUへデータを移動（必要に応じて）
#         outputs = model([image])  # バッチサイズ1のリストとして渡す

#         # 予測結果と実際のデータを比較し、性能評価を行う
#         predicted_boxes = outputs[0]['boxes']
#         predicted_labels = outputs[0]['labels']

#         # 予測されたバウンディングボックスとクラスラベルを実際のデータと比較して評価
#         iou_values = calculate_iou(predicted_boxes, boxes)
#         accuracy = calculate_accuracy(predicted_labels, actual_labels)

#         # IOUの最大値を取得
#         max_iou = torch.max(iou_values).item()

#         # 合計のIOUと正確さを計算
#         total_iou += max_iou  # 最大のIOUを使用
#         total_accuracy += accuracy



[{'boxes': tensor([[1280.4034, 1528.4061, 3709.4602, 2961.0693],
        [ 721.3444,  255.6906, 3916.0542, 2896.4595],
        [2124.0894, 1101.7996, 3975.0132, 2945.7639],
        [ 216.1555,    0.0000, 3988.6458, 2939.8350],
        [ 248.2514,   74.5624, 4012.6411, 3024.0000]]), 'labels': tensor([ 1,  1,  1, 67, 65]), 'scores': tensor([0.2080, 0.1431, 0.1048, 0.0670, 0.0530])}]
[{'boxes': tensor([[1039.5994, 1073.0049, 3780.2773, 2440.9766],
        [ 809.2783,  802.4148, 2550.4995, 2773.3164],
        [ 938.8151, 1055.5380, 1874.4094, 2900.4160],
        [ 604.0094,  419.1537, 3805.0305, 2136.6570],
        [1015.3345, 1413.9248, 2853.5449, 2653.4700],
        [1834.8743, 1425.2382, 3818.0781, 2871.6440],
        [1732.4094, 1278.0569, 2773.4797, 2849.2671]]), 'labels': tensor([1, 1, 1, 1, 1, 1, 1]), 'scores': tensor([0.9730, 0.7914, 0.1750, 0.1323, 0.1286, 0.0797, 0.0528])}]
[{'boxes': tensor([[1557.8922, 1210.4463, 2419.8157, 2529.0273],
        [1067.9637,  649.0818, 2413.7224, 

In [121]:
# threshold = 0.5  # 信頼度スコアの閾値

# # 各画像に対して処理
# for image_predictions in predictions:
#     selected_predictions = []  # 信頼度スコアが閾値を超える予測を格納するリスト

#     for prediction in image_predictions:
#         # 予測の信頼度スコアを取得
#         scores = prediction['scores']

#         if scores.numel() > 0:
#             score = scores[0].item()  # scoresが空でない場合、最初のスコアを取得

#             # 信頼度スコアが閾値を超える場合、またはバウンディングボックスが存在しない場合に予測を選択リストに追加
#             if score > threshold or len(prediction['boxes']) == 0:
#                 selected_predictions.append(prediction)

#     # 選択した予測バウンディングボックスの情報を取得
#     selected_boxes = [prediction['boxes'] for prediction in selected_predictions]
#     selected_labels = [prediction['labels'] for prediction in selected_predictions]

#     # ここで選択したバウンディングボックスやラベルを用いて評価や後続の処理を行います


In [122]:
from sklearn.metrics import average_precision_score
import numpy as np

# 各クラスごとに平均適合率を計算
average_precisions = []

for class_id in range(num_classes):
    y_true_class = [1 if label == class_id else 0 for labels in actual_labels]
    y_scores_class = [-prediction[class_id] if class_id in prediction else 0 for prediction in predicted_labels]
    average_precision = average_precision_score(y_true_class, y_scores_class)
    average_precisions.append(average_precision)

# 各クラスの平均適合率を計算
mAP = np.mean(average_precisions)

# mAP を出力
print(f'mAP: {mAP}')

NameError: name 'num_classes' is not defined

In [None]:

# モデルの予測と実際のデータを格納するリスト
predicted_boxes = []
predicted_labels = []
actual_boxes = []
actual_labels = []

# 各画像に対して処理
for image_predictions, (image, boxes, labels) in zip(predictions, data_loader_val):
    selected_predictions = []  # 信頼度スコアが閾値を超える予測を格納するリスト

    for prediction in image_predictions:
        # 予測の信頼度スコアを取得
        scores = prediction['scores']

        if scores.numel() > 0:
            score = scores[0].item()  # scoresが空でない場合、最初のスコアを取得

            # 信頼度スコアが閾値を超える場合、またはバウンディングボックスが存在しない場合に予測を選択リストに追加
            if score > threshold or len(prediction['boxes']) == 0:
                selected_predictions.append(prediction)

    # 選択した予測バウンディングボックスの情報を取得
    selected_boxes = [prediction['boxes'] for prediction in selected_predictions]
    selected_labels = [prediction['labels'] for prediction in selected_predictions]

    # 実際のデータをリストに追加
    actual_boxes.append(boxes)
    actual_labels.append(labels)

    # モデルの予測をリストに追加
    predicted_boxes.append(selected_boxes)
    predicted_labels.append(selected_labels)

# 予測と実際のデータを評価
# 例: 平均適合率 (Average Precision) の計算
average_precision = average_precision_score(actual_labels, predicted_labels)

# その他の評価メトリクスを計算できます
# 例: IoU (Intersection over Union) の計算

# 最終的な評価メトリクスを出力
print(f'Average Precision: {average_precision}')
