In [1]:
import os
import pandas as pd
import numpy as np
def filePathsGen(rootPath):
    """此函數將rootPath資料夾目錄中的所有圖片路徑資訊儲存至一個清單內。"""
    paths = []
    # dirs = []
    for dirPath, dirNames, fileNames in os.walk(rootPath):
        for fileName in fileNames:
            # print(fileName)
            # category = dirPath.split("\\")[-1]  # (Windows)
            fullPath = os.path.join(dirPath, fileName)
            paths.append((fileName, fullPath))
    return paths

In [2]:
train_paths = filePathsGen("..\\data\\versions\\1\\train_data\\train-old")  # 載入圖片路徑 (Windows)
test_paths = filePathsGen("..\\data\\versions\\1\\test_data\\test_data")  # 載入圖片路徑 (Windows)
df_train_image = pd.DataFrame(train_paths, columns=["image","path"]) # 圖片路徑存成Pandas資料表
df_test_image = pd.DataFrame(test_paths, columns=["image","path"]) # 圖片路徑存成Pandas資料表
df_train_info = pd.read_json("..\\data\\versions\\1\\train.json")
df_test_info = pd.read_json("..\\data\\versions\\1\\test.json")
df_train = pd.merge(df_train_image, df_train_info, left_on='image', right_on='file_name', how='inner').drop(columns=['file_name'])
df_test = pd.merge(df_test_image, df_test_info, left_on='image', right_on='file_name', how='inner').drop(columns=['file_name'])

In [3]:
# 檢查 boxes 中是否有相同的值並打印 index 和病灶名
for index in range(len(df_train)):  # 假設 df_train 中有多行
    syms = df_train.loc[index]["syms"]  # 獲取 syms 列
    boxes = df_train.loc[index]["boxes"]  # 獲取 boxes 列

    # 找到重複的 boxes
    unique_boxes = []
    duplicate_found = False
    for i, box in enumerate(boxes):
        if box in unique_boxes:
            # 找到重複的病灶名
            duplicate_index = unique_boxes.index(box)
            print(f"Duplicate box found at index {index}: {box}")
            print(f"Corresponding lesions: {syms[duplicate_index]} and {syms[i]}")
            duplicate_found = True
        else:
            unique_boxes.append(box)
    
    # if not duplicate_found:
    #     print(f"No duplicates at index {index}")

# 創建 syms 與 boxes 的映射字典
result_dict_list = []  # 用於存放每個樣本的字典

for index in range(len(df_train)):  # 假設 df_train 中有多行
    syms = df_train.loc[index]["syms"]  # 獲取 syms 列
    boxes = df_train.loc[index]["boxes"]  # 獲取 boxes 列

    # 確保 syms 和 boxes 的長度一致
    if len(syms) != len(boxes):
        print(f"Warning: Mismatch in syms and boxes length at index {index}")
        continue

    # 創建當前樣本的字典
    sample_dict = {syms[j]: boxes[j] for j in range(len(syms))}
    result_dict_list.append(sample_dict)

Duplicate box found at index 85: [243, 662, 407, 886]
Corresponding lesions: Fibrosis and Consolidation
Duplicate box found at index 115: [621, 267, 741, 401]
Corresponding lesions: Fibrosis and Fracture
Duplicate box found at index 131: [620, 362, 798, 532]
Corresponding lesions: Mass and Nodule
Duplicate box found at index 205: [160, 650, 469, 912]
Corresponding lesions: Fibrosis and Effusion
Duplicate box found at index 719: [128, 378, 390, 685]
Corresponding lesions: Atelectasis and Effusion
Duplicate box found at index 1647: [108, 645, 175, 727]
Corresponding lesions: Calcification and Nodule
Duplicate box found at index 1746: [689, 420, 863, 552]
Corresponding lesions: Fracture and Fracture
Duplicate box found at index 1929: [319, 167, 370, 291]
Corresponding lesions: Fibrosis and Fracture
Duplicate box found at index 2185: [268, 106, 479, 320]
Corresponding lesions: Fibrosis and Fracture
Duplicate box found at index 2185: [601, 121, 757, 319]
Corresponding lesions: Fracture and 

In [4]:
df_train.head(20)

Unnamed: 0,image,path,syms,boxes
0,36200.png,..\data\versions\1\train_data\train-old\36200.png,[],[]
1,36201.png,..\data\versions\1\train_data\train-old\36201.png,[],[]
2,36202.png,..\data\versions\1\train_data\train-old\36202.png,[],[]
3,36203.png,..\data\versions\1\train_data\train-old\36203.png,[],[]
4,36204.png,..\data\versions\1\train_data\train-old\36204.png,[Nodule],"[[233, 666, 258, 693]]"
5,36205.png,..\data\versions\1\train_data\train-old\36205.png,"[Nodule, Nodule, Nodule, Nodule, Nodule, Nodul...","[[884, 223, 957, 299], [414, 208, 499, 267], [..."
6,36206.png,..\data\versions\1\train_data\train-old\36206.png,"[Nodule, Nodule, Consolidation]","[[209, 594, 248, 643], [771, 508, 815, 568], [..."
7,36209.png,..\data\versions\1\train_data\train-old\36209.png,"[Effusion, Effusion, Effusion]","[[58, 515, 208, 659], [18, 605, 359, 802], [83..."
8,36210.png,..\data\versions\1\train_data\train-old\36210.png,[Nodule],"[[240, 303, 286, 356]]"
9,36216.png,..\data\versions\1\train_data\train-old\36216.png,"[Consolidation, Effusion]","[[224, 511, 445, 800], [127, 739, 452, 904]]"


In [5]:
# 建立疾病名稱到代號的映射字典
disease_names = set([disease for sublist in df_train["syms"] for disease in sublist])
disease_to_id = {disease: idx for idx, disease in enumerate(disease_names)}
print("Disease to ID mapping:", disease_to_id)

df_train["syms"] = df_train["syms"].apply(lambda x: [disease_to_id[disease] for disease in x])
df_test["syms"] = df_test["syms"].apply(lambda x: [disease_to_id[disease] for disease in x])





Disease to ID mapping: {'Consolidation': 0, 'Mass': 1, 'Emphysema': 2, 'Fibrosis': 3, 'Effusion': 4, 'Pneumothorax': 5, 'Nodule': 6, 'Atelectasis': 7, 'Calcification': 8, 'Fracture': 9}


In [6]:
df_train

Unnamed: 0,image,path,syms,boxes
0,36200.png,..\data\versions\1\train_data\train-old\36200.png,[],[]
1,36201.png,..\data\versions\1\train_data\train-old\36201.png,[],[]
2,36202.png,..\data\versions\1\train_data\train-old\36202.png,[],[]
3,36203.png,..\data\versions\1\train_data\train-old\36203.png,[],[]
4,36204.png,..\data\versions\1\train_data\train-old\36204.png,[6],"[[233, 666, 258, 693]]"
...,...,...,...,...
2996,70948.png,..\data\versions\1\train_data\train-old\70948.png,"[0, 4, 0, 4]","[[521, 441, 829, 945], [521, 441, 829, 945], [..."
2997,70949.png,..\data\versions\1\train_data\train-old\70949.png,"[0, 4, 0, 4]","[[559, 396, 847, 880], [559, 396, 847, 880], [..."
2998,70950.png,..\data\versions\1\train_data\train-old\70950.png,"[0, 8, 4]","[[637, 510, 824, 672], [748, 682, 771, 722], [..."
2999,70954.png,..\data\versions\1\train_data\train-old\70954.png,"[0, 0]","[[603, 111, 858, 695], [254, 162, 479, 682]]"


In [7]:
from sklearn.model_selection import train_test_split

# split dataset into train and val
train, val = train_test_split(
    df_train, 
    test_size=0.2,   # 設定 val 集比例
    random_state=25  # 固定隨機種子，確保可重現
)

In [8]:
import numpy as np
import cv2
import torch
from torch.utils.data import TensorDataset, DataLoader

def dataLoad(dfPath):
    """
    加載數據並處理圖像，同時生成 Faster R-CNN 的 `targets` 格式。
    
    Args:
        dfPath (pd.DataFrame): 包含圖像路徑和標籤的 DataFrame。

    Returns:
        tuple: 處理後的圖像數據 (x) 和目標數據 (targets)。
    """
    # 提取文件路徑
    paths = dfPath["path"].values
    images = []  # 存儲處理後的圖像
    targets = []  # 存儲目標數據

    for j in range(len(paths)):
        # 加載圖像
        syms = dfPath.iloc[j]["syms"]  # 獲取疾病類別
        boxes = dfPath.iloc[j]["boxes"]  # 獲取邊界框

        # 過濾沒有邊界框的樣本
        if len(boxes) == 0:
            print(f"Skipping index {j}: No bounding boxes found.")
            continue
        img = cv2.imread(paths[j], cv2.IMREAD_GRAYSCALE)  # 加載為灰度圖像
        
        # 調整大小
        img_resized = cv2.resize(img, (128, 128))  # 使用 OpenCV 進行縮放
        
        # 標準化圖像到 [0, 1] 範圍
        img_resized = img_resized / 255.0
        images.append(img_resized)

        # 獲取疾病類別和邊界框
        

        # 構建 Faster R-CNN 的 `targets` 格式
        target = {
            "boxes": torch.tensor(boxes, dtype=torch.float32),  # 邊界框
            "labels": torch.tensor(syms, dtype=torch.int64)      # 疾病類別
        }
        targets.append(target)

    # 將圖像數據轉換為 PyTorch 張量
    x = torch.tensor(np.array(images, dtype=np.float32)).unsqueeze(1)  # 添加通道維度

    return x, targets

# 修改後的加載數據
x_train, y_train = dataLoad(train)
x_val, y_val = dataLoad(val)



print(f"Train data shape: {x_train.shape}, Train targets count: {len(y_train)}")
print(f"Validation data shape: {x_val.shape}, Validation targets count: {len(y_val)}")


Skipping index 16: No bounding boxes found.
Skipping index 18: No bounding boxes found.
Skipping index 23: No bounding boxes found.
Skipping index 26: No bounding boxes found.
Skipping index 39: No bounding boxes found.
Skipping index 40: No bounding boxes found.
Skipping index 45: No bounding boxes found.
Skipping index 50: No bounding boxes found.
Skipping index 60: No bounding boxes found.
Skipping index 69: No bounding boxes found.
Skipping index 70: No bounding boxes found.
Skipping index 72: No bounding boxes found.
Skipping index 74: No bounding boxes found.
Skipping index 75: No bounding boxes found.
Skipping index 77: No bounding boxes found.
Skipping index 81: No bounding boxes found.
Skipping index 87: No bounding boxes found.
Skipping index 88: No bounding boxes found.
Skipping index 92: No bounding boxes found.
Skipping index 99: No bounding boxes found.
Skipping index 104: No bounding boxes found.
Skipping index 114: No bounding boxes found.
Skipping index 115: No boundin

In [9]:
print(f"Number of training images: {len(x_train)}")
print(f"Number of training targets: {len(y_train)}")
print(f"Number of validation images: {len(x_val)}")
print(f"Number of validation targets: {len(y_val)}")


Number of training images: 1873
Number of training targets: 1873
Number of validation images: 447
Number of validation targets: 447


In [10]:
from torch.utils.data import Dataset

# def collate_fn(batch):
#     images, targets = zip(*batch)
    
#     # 将图像列表转换为张量，并移动到指定设备
#     images = [img if isinstance(img, torch.Tensor) else torch.tensor(img, dtype=torch.float32) for img in images]
    
#     # 将目标列表中的每个字典的值转换为张量，并移动到指定设备
#     targets = [{k: v if isinstance(v, torch.Tensor) else torch.tensor(v) for k, v in t.items()} for t in targets]
    
#     return images, targets
def collate_fn(batch):
    images, targets = zip(*batch)
    return list(images), list(targets)
class CustomDataset(Dataset):
    def __init__(self, images, targets):
        """
        自定義數據集，用於 Faster R-CNN 的訓練。
        Args:
            images (torch.Tensor): 圖像數據，形狀為 (N, C, H, W)。
            targets (list): 目標數據，每個元素是包含 `boxes` 和 `labels` 的字典。
        """
        self.images = images
        self.targets = targets

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return self.images[idx], self.targets[idx]


In [None]:
# 創建 Dataset
dataset_train = CustomDataset(x_train, y_train)
dataset_val = CustomDataset(x_val, y_val)

# 創建 DataLoader
dl_train = DataLoader(dataset_train, batch_size=32, shuffle=True, collate_fn=collate_fn)
dl_val = DataLoader(dataset_val, batch_size=32, shuffle=False, collate_fn=collate_fn)




In [12]:
from torchmetrics.detection.mean_ap import MeanAveragePrecision
# from torchmetrics.classification import MulticlassAccuracy
class SimpleTrainer:
    def __init__(self, model, optimizer, device="cuda:0"):
        self.model = model.to(device)
        self.optimizer = optimizer
        self.device = device
        self.iou_metric = MeanAveragePrecision(box_format='xyxy', iou_thresholds=[0.5]).to(device)
        # self.classification_metric = MulticlassAccuracy(num_classes=10, average='weighted').to(device)
    
    def train_step(self, dataloader):
        
        train_loss = 0
        # 清空 metrics
        self.iou_metric.reset()
        # self.classification_metric.reset()

        for batch_x, batch_y in dataloader:
            self.model.train()
            batch_x = [img.to(self.device) for img in batch_x]
            batch_y = [{k: v.to(self.device) for k, v in t.items()} for t in batch_y]
            
            # 正向传播计算损失
            losses = self.model(batch_x, batch_y)
            loss_value = sum(loss for loss in losses.values())
            train_loss += loss_value.item()

            # 反向传播和优化
            self.optimizer.zero_grad()
            loss_value.backward()
            self.optimizer.step()

            # 计算分类准确率和 IoU
            with torch.no_grad():
                # print("evaluation!")
                self.model.eval()
                pred_y = self.model(batch_x)
                # for i, pred in enumerate(pred_y):
                #     print(f"Checking pred_y[{i}]")
                #     check_dict_values(pred)
                # print("predict done!")
                
                # print("pred_y: ",pred_y)
                # # print("batch_x",batch_x.shape)
                # # print(f"pred_y type: {type(pred_y)}")
                # print(f"pred_y boxes: {pred_y[0]['boxes'].shape}")
                # print(f"pred_y labels: {pred_y[0]['labels'].shape}")
                # print(f"pred_y score: {pred_y[0]['scores'].shape}")
                # print(f"pred_y boxes: {pred_y[0]['boxes']}")
                # print(f"pred_y labels: {pred_y[0]['labels']}")
                # print(f"pred_y score: {pred_y[0]['scores']}")
                # # print(f"batch_x type: {type(batch_x)}")
                # # print(f"batch_x content: {batch_x}")
                # pred_y = [
                #     {k: v.to(self.device) if isinstance(v, torch.Tensor) else torch.tensor(v).to(self.device) for k, v in t.items()}
                #     for t in pred_y
                # ]
                # print("after!!!!")
                # print(f"pred_y boxes: {pred_y[0]['boxes'].shape}")
                # print(f"pred_y labels: {pred_y[0]['labels'].shape}")
                # print(f"pred_y score: {pred_y[0]['scores'].shape}")
                # print(f"pred_y boxes: {pred_y[0]['boxes']}")
                # print(f"pred_y labels: {pred_y[0]['labels']}")
                # print(f"pred_y score: {pred_y[0]['scores']}")
                self.iou_metric.update(pred_y, batch_y)
                # print("iou_metric done!")
                # pred_labels = torch.cat([p["labels"] for p in pred_y], dim=0)
                # true_labels = torch.cat([t["labels"] for t in batch_y], dim=0)
                # self.classification_metric.update(pred_labels, true_labels)
                # break
                
        
        # 计算平均损失和准确率
        train_loss /= len(dataloader.dataset)
        # classification_accuracy = self.classification_metric.compute()
        iou_accuracy = self.iou_metric.compute()
        
        print(f"train_loss={train_loss:.4f}, mAP={iou_accuracy['map']:.4f}")
        return train_loss, iou_accuracy['map']
    
    def test_step(self, dataloader):
        """
        执行模型的测试评估
        """
        self.model.eval()
        test_loss = 0

        # 清空 metrics
        self.iou_metric.reset()
        # self.classification_metric.reset()
        print('test start')
        with torch.no_grad():
            for batch_x, batch_y in dataloader:
                batch_x = [img.to(self.device) for img in batch_x]
                batch_y = [{k: v.to(self.device) for k, v in t.items()} for t in batch_y]

                # 预测结果
                predictions = self.model(batch_x)
                
                # 计算损失（如果需要测试阶段的损失值）
                if len(batch_y) > 0:
                    losses = self.model(batch_x, batch_y)
                    loss_value = sum(loss for loss in losses.values())
                    test_loss += loss_value.item()

                # 更新 metrics
                self.iou_metric.update(predictions, batch_y)
                # pred_labels = torch.cat([p["labels"] for p in predictions], dim=0)
                # true_labels = torch.cat([t["labels"] for t in batch_y], dim=0)
                # self.classification_metric.update(pred_labels, true_labels)

        # 计算平均测试损失
        test_loss /= len(dataloader.dataset)

        # 计算准确率
        # classification_accuracy = self.classification_metric.compute()
        iou_accuracy = self.iou_metric.compute()

        print(f"test_loss={test_loss:.4f}, mAP={iou_accuracy['map']:.4f}")
        return test_loss, iou_accuracy['map']
    
    def fit(self, dl_train, dl_test, num_epochs):
        self.metrics = {
            "train_loss": [],
            "test_loss": [],
            "train_classification_acc": [],
            "test_classification_acc": [],
            "train_map": [],
            "test_map": []
        }
        for epoch in range(num_epochs):
            print(f"Epoch {epoch+1}/{num_epochs}")
            train_loss, train_map  = self.train_step(dl_train)
            # print('train done!')
            test_loss, test_map = self.test_step(dl_test)
            self.metrics["train_loss"].append(train_loss)
            self.metrics["test_loss"].append(test_loss)
            # self.metrics["train_classification_acc"].append(train_classification_acc)
            # self.metrics["test_classification_acc"].append(test_classification_acc)
            self.metrics["train_map"].append(train_map)
            self.metrics["test_map"].append(test_map)
        
    def graph_map(self):
        import matplotlib.pyplot as plt
        plt.plot(self.metrics["train_map"], label="train_map")
        plt.plot(self.metrics["test_map"], label="test_map")
        plt.legend()
        plt.show()
        
    # def graph_class_acc(self):
    #     import matplotlib.pyplot as plt
    #     plt.plot(self.metrics["train_classification_acc"], label="train_classification_acc")
    #     plt.plot(self.metrics["test_classification_acc"], label="test_classification_acc")
    #     plt.legend()
    #     plt.show()
        
    def graph_loss(self):
        import matplotlib.pyplot as plt
        plt.plot(self.metrics["train_loss"], label="train_loss")
        plt.plot(self.metrics["test_loss"], label="test_loss")
        plt.legend()
        plt.show()

In [14]:
print(f"Number of training images: {len(x_train)}")
len(dl_train.dataset)

Number of training images: 1873


1873

In [None]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# 初始化 Faster R-CNN 模型
model = fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()

# 獲取原始分類頭的輸入特徵數量
num_classes = 10  # 包括背景類別
in_features = model.roi_heads.box_predictor.cls_score.in_features

# 替換為新的分類頭
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model = model.to("cuda:0")
# 測試 DataLoader
for images, targets in dl_train:
    # 將圖像移動到 GPU
    img = [image.to("cuda:0") for image in images]
    
    # 模型推理
    with torch.no_grad():  # 禁用梯度計算
        predictions = model(img)
    
    # 打印結果（僅供測試）
    print(predictions)

    # 清除變數，釋放 CUDA 記憶體
    del img, predictions
    torch.cuda.empty_cache()  # 釋放未使用的記憶體
    break

In [None]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torch.optim import Adam
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
# 初始化优化器

# 初始化 Faster R-CNN 模型
frcnn_model = fasterrcnn_resnet50_fpn(pretrained=True)

num_classes = 10  # 包括背景類別
in_features = frcnn_model.roi_heads.box_predictor.cls_score.in_features

# 替換為新的分類頭
frcnn_model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
frcnn_model = frcnn_model.to("cuda:0")
num_epochs = 50
learning_rate = 1e-4
optimizer = Adam(frcnn_model.parameters(), lr=learning_rate)
model = SimpleTrainer(frcnn_model, optimizer)
metric = model.fit(dl_train, dl_val,num_epochs)
# # 測試 DataLoader
# # for images, targets in dl_train:
# #     # 前向傳播
# #     loss = model(images, targets)
# #     loss_value = sum(sep_loss for sep_loss in loss.values())
# #     print(loss_value)
# #     break



Epoch 1/50


In [None]:
import torch

if torch.cuda.is_available():
    print("CUDA is available!")
    print("GPU Device:", torch.cuda.get_device_name(0))
else:
    print("CUDA is not available. Using CPU only.")


CUDA is available!
GPU Device: NVIDIA GeForce GTX 1080
