# 這份檔案會做以下部分

1. X-ray image normalization
2. mask image 轉為 bounding box
3. 資料集轉換為 COCO format
4. 將資料存為 .jpg & .json

---

# Rename files & Save .csv as UTF-8 encoding format

1. 上傳資料前先將資料夾名稱換成英文:

* normal -> normal
* 心臟肥大 -> cardiac_hypertrophy
* 主動脈硬鈣化 -> aortic_atherosclerosis_calcification
* 主動脈彎曲 -> aortic_curvature
* 肺尖肋膜增厚 -> intercostal_pleural_thickening
* 肺野浸潤增加 -> lung_field_infiltration
* 胸椎退化性關節病變 -> degenerative_joint_disease_of_the_thoracic_spine
* 脊椎側彎 -> scoliosis



---

# Check data & images

In [None]:
!pip install pydicom



In [None]:
!pip install scikit-multilearn




In [None]:
# import libraries

# basic
import warnings
warnings.filterwarnings('ignore')

import os
import random
import pydicom
import itertools
import numpy as np
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from skmultilearn.model_selection import iterative_train_test_split

# visualization
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.gridspec as gridspec

# object detection
import json
from skimage.measure import label as sk_label
from skimage.measure import regionprops as sk_regions

In [None]:
from google.colab import drive
drive.mount('/content/drive')

data_path = '/content/drive/My Drive/hwk05_data'
print("目錄內容：", os.listdir(data_path))  # 檢查檔案是否存在

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
目錄內容： ['README.pdf', 'train.csv', 'test.csv', 'test', 'train', 'val.json', 'train.json']


In [None]:
class config:
    root = "/content/drive/MyDrive/hwk05_data"
    seed = 42

In [None]:
def seed_everything(seed):
    # Set Python random seed
    random.seed(seed)

    # Set NumPy random seed
    np.random.seed(seed)

seed_everything(config.seed)

In [None]:
# training dataframe
train_df = pd.read_csv("/content/drive/MyDrive/hwk05_data/train.csv")
train_df

In [None]:
# 確認更新後的分組結果
grouped_after_update = train_df.groupby("category")
for name, group in grouped_after_update:
    print(f"Group: {name}, Size: {len(group)}")


Group: aortic_atherosclerosis_calcification, Size: 71
Group: aortic_curvature, Size: 52
Group: cardiac_hypertrophy, Size: 33
Group: degenerative_joint_disease_of_the_thoracic_spine, Size: 60
Group: intercostal_pleural_thickening, Size: 29
Group: lung_field_infiltration, Size: 68
Group: normal, Size: 80
Group: scoliosis, Size: 58


## 畫出8種類別的第一張 image & mask 位置

這裡如果類別是 normal，就自行生成一個 shape 和原始影像相同的 mask。

In [None]:
temp = train_df[train_df["category"].duplicated() == False]
temp

Unnamed: 0,ID,category,Width,Height,Filename,ImagePath,MarkPath
0,TDR04_20180315_075734,normal,2328,2344,220_97.dcm,normal/image/220_97.dcm,normal/mark/220_97.dcm.jpg
80,TDR04_20180227_083423,aortic_curvature,2504,2536,220_14.dcm,aortic_curvature/image/220_14.dcm,aortic_curvature/mark/220_14.dcm.jpg
132,TDR01_20190313_090724,aortic_atherosclerosis_calcification,2392,2600,10_1d.dcm,aortic_atherosclerosis_calcification/image/10_...,aortic_atherosclerosis_calcification/mark/10_1...
203,TDR04_20180226_090403,cardiac_hypertrophy,2008,2280,4440.dcm,cardiac_hypertrophy/image/4440.dcm,cardiac_hypertrophy/mark/4440.dcm.jpg
236,TDR05_20151105_094209,intercostal_pleural_thickening,2296,2512,4440_4.dcm,intercostal_pleural_thickening/image/4440_4.dcm,intercostal_pleural_thickening/mark/4440_4.dcm...
265,TDR04_20180227_083423,lung_field_infiltration,2504,2536,220_3.dcm,lung_field_infiltration/image/220_3.dcm,lung_field_infiltration/mark/220_3.dcm.jpg
333,TDR04_20180227_085056,degenerative_joint_disease_of_the_thoracic_spine,2336,2360,220_15.dcm,degenerative_joint_disease_of_the_thoracic_spi...,degenerative_joint_disease_of_the_thoracic_spi...
393,TDR01_20171109_083459,scoliosis,2232,2408,A0_26.dcm,scoliosis/image/A0_26.dcm,scoliosis/mark/A0_26.dcm.jpg


In [None]:
def plot_images_and_marks(df):

    temp = df[df["category"].duplicated() == False]

    rows, cols = 4, 2
    fig = plt.figure(figsize = (16, 16))
    grid = plt.GridSpec(rows, cols)

    for i in range(rows * cols):
        image = pydicom.dcmread(os.path.join(config.root, "train", temp.iloc[i, 5])).pixel_array
        if temp.iloc[i, 1] != "normal":
            mark = np.array(Image.open(os.path.join(config.root, "train", temp.iloc[i, 6])))
        else:
            mark = np.zeros((image.shape[0], image.shape[1]))

        categories = fig.add_subplot(grid[i])
        categories.set_title(f"{temp.iloc[i, 1]}\n", fontweight = 'semibold', size = 14)
        categories.set_axis_off()

        gs = gridspec.GridSpecFromSubplotSpec(1, 2, subplot_spec = grid[i])

        ax = fig.add_subplot(gs[0])
        ax.imshow(image, cmap = "gray")
        ax.set_title("Image")
        ax.axis("off")

        ax = fig.add_subplot(gs[1], sharey = ax)
        ax.imshow(mark, cmap = "gray")
        ax.set_title("Mark")
        ax.axis("off")

    fig.patch.set_facecolor('white')
    fig.suptitle("Images and marks of 8 categories\n", fontweight = 'bold', size = 16)
    fig.tight_layout()

plot_images_and_marks(train_df)

---

# X-ray image normalization

完成 **intensity log-transformation** 跟 **simplest color balance algorithm** ，目的是為了轉換影像型態及調整 L & R 字樣的亮度。  
以 ID 為 TDR02_20161209_161439 的病患為例，輸出轉換前及轉換後的 X-ray 影像：

In [None]:
def X_ray_normalization(dcm_file, vmin, vmax):

    img = pydicom.dcmread(dcm_file)
    origin = img.pixel_array

    # needed values
    WW = img.WindowWidth
    WC = img.WindowCenter
    BitsStored = img.BitsStored

    # intensity log-transformation
    lower_bound = WC - WW / 2
    upper_bound = WC + WW / 2

    # 限制影像範圍
    clamped_array = np.clip(origin, lower_bound, upper_bound)

    # 對數轉換
    log_img = -np.log((1 + clamped_array) / (2 ** BitsStored))
    # simplest color balance algorithm
    # Linear contrast stretching: Normalize between 0 and 1
    normalize_img = (log_img - vmin) / (vmax-vmin)
    normalize_img = np.clip(normalize_img, 0, 1)

    return origin, log_img, normalize_img

In [None]:
def plot_before_and_after(ID, df):

    patient_df = df[df["ID"] == ID]
    path = os.path.join(config.root, "train", patient_df.iloc[0, 5])
    origin, log_img, normalize_img = X_ray_normalization(path, vmin = 0, vmax = 2.5)

    plt.figure(figsize = (16, 16))
    fig, ax = plt.subplots(1, 3)
    np.vectorize(lambda ax: ax.axis('off'))(ax)
    plt.subplots_adjust(wspace = None, hspace = None)

    ax[0].imshow(origin, cmap = "gray")
    ax[0].set_title("Original Image", size = 8)
    ax[1].imshow(log_img, cmap = "gray")
    ax[1].set_title("After Log-transformation", size = 8)
    ax[2].imshow(normalize_img, cmap = "gray")
    ax[2].set_title("After Normalization", size = 8)

    fig.suptitle(f"{ID}", fontweight = 'bold', size = 10, x = 0.52, y = 0.77)

plot_before_and_after(ID = "TDR02_20161209_161439", df = train_df)

---

# Mask image to bounding box

這部分將資料集中的 mask 轉換為丟入模型所需的 bounding box，並畫出8種類別的轉換後影像、轉換後影像及 bounding box，以及 mask 影像：

In [None]:
def mask_to_bbox(mark_path):
    img = np.array(Image.open(mark_path))

    mask = img != 0
    sk_mask = sk_label(mask, connectivity = 2)
    regions = sk_regions(sk_mask)
    bboxes = []
    for region in regions:
        if region.area < 3000 :
            continue
        bboxes.append(region.bbox)

    ymin, xmin, ymax, xmax = bboxes[0]

    return xmin, ymin, xmax, ymax

In [None]:
def plot_bbox_and_mark(df):

    temp = df[df["category"].duplicated() == False]

    rows, cols = 4, 2
    fig = plt.figure(figsize = (16, 16))
    grid = plt.GridSpec(rows, cols)

    for i in range(rows * cols):

        path = os.path.join(config.root, "train", temp.iloc[i, 5])
        mark_path = os.path.join(config.root, "train", temp.iloc[i, 6])

        _, _, after = X_ray_normalization(path, vmin = 0, vmax = 2.5)

        if temp.iloc[i, 1] != "normal":
            mark = np.array(Image.open(mark_path))
            xmin, ymin, xmax, ymax = mask_to_bbox(mark_path)
        else:
            mark = np.zeros((after.shape[0], after.shape[1]))
            xmin, ymin, xmax, ymax = 0, 0, 0, 0

        bbox = patches.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, linewidth = 2,
                                 edgecolor = "r", facecolor = 'none')

        categories = fig.add_subplot(grid[i])
        categories.set_title(f"{temp.iloc[i, 1]}\n", fontweight = 'semibold', size = 14)
        categories.set_axis_off()

        gs = gridspec.GridSpecFromSubplotSpec(1, 3, subplot_spec = grid[i])

        ax = fig.add_subplot(gs[0])
        ax.imshow(after, cmap = "gray")
        ax.set_title("Image")
        ax.axis("off")

        ax = fig.add_subplot(gs[1], sharey = ax)
        ax.imshow(after, cmap = "gray")
        ax.add_patch(bbox)
        ax.set_title("Image with bbox")
        ax.axis("off")

        ax = fig.add_subplot(gs[2], sharey = ax)
        ax.imshow(mark, cmap = "gray")
        ax.set_title("Mark")
        ax.axis("off")

    fig.patch.set_facecolor('white')
    fig.suptitle("Images with bbox and marks of 8 categories\n", fontweight = 'bold', size = 16)
    fig.tight_layout()

plot_bbox_and_mark(train_df)

接著將轉換出的 bounding box 寫入 training dataframe 中：

In [None]:
def write_bbox(df):

    all_xmin, all_ymin, all_xmax, all_ymax = [], [], [], []

    for i in range(df.shape[0]):

        if df.iloc[i, 1] != "normal":
            mark_path = os.path.join(config.root, "train", df.iloc[i, 6])
            xmin, ymin, xmax, ymax = mask_to_bbox(mark_path)
        else:
            xmin, ymin, xmax, ymax = 0, 0, 0, 0

        all_xmin.append(xmin)
        all_ymin.append(ymin)
        all_xmax.append(xmax)
        all_ymax.append(ymax)

    df["xmin"] = all_xmin
    df["ymin"] = all_ymin
    df["xmax"] = all_xmax
    df["ymax"] = all_ymax

write_bbox(train_df)
train_df

Unnamed: 0,ID,category,Width,Height,Filename,ImagePath,MarkPath,xmin,ymin,xmax,ymax
0,TDR04_20180315_075734,normal,2328,2344,220_97.dcm,normal/image/220_97.dcm,normal/mark/220_97.dcm.jpg,0,0,0,0
1,TDR04_20180315_080518,normal,2472,2560,220_94.dcm,normal/image/220_94.dcm,normal/mark/220_94.dcm.jpg,0,0,0,0
2,TDR04_20180315_081322,normal,2312,2496,220_93.dcm,normal/image/220_93.dcm,normal/mark/220_93.dcm.jpg,0,0,0,0
3,TDR04_20180315_081746,normal,2448,2584,220_92.dcm,normal/image/220_92.dcm,normal/mark/220_92.dcm.jpg,0,0,0,0
4,TDR04_20180315_082113,normal,2144,2384,220_91.dcm,normal/image/220_91.dcm,normal/mark/220_91.dcm.jpg,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
446,TDR02_20161209_161439,scoliosis,2376,2592,4440_5.dcm,scoliosis/image/4440_5.dcm,scoliosis/mark/4440_5.dcm.jpg,1016,560,1432,2328
447,TDR04_20180224_084933,scoliosis,2248,2600,4440_0.dcm,scoliosis/image/4440_0.dcm,scoliosis/mark/4440_0.dcm.jpg,912,552,1408,2272
448,TDR04_20180226_082354,scoliosis,2488,2456,4440.dcm,scoliosis/image/4440.dcm,scoliosis/mark/4440.dcm.jpg,1016,464,1560,2184
449,TDR01_20171106_095308,scoliosis,2320,2376,A0_29.dcm,scoliosis/image/A0_29.dcm,scoliosis/mark/A0_29.dcm.jpg,1032,512,1384,2016


---

# Write class id

因之後轉換資料格式所需，我們需要把疾病類別改寫為 `class_id`，也就是 0 ~ 7 的數字。

In [None]:
labels = list(train_df["category"].unique())
label2class = {l: c for c, l in enumerate(labels)}
label2class

{'normal': 0,
 'aortic_curvature': 1,
 'aortic_atherosclerosis_calcification': 2,
 'cardiac_hypertrophy': 3,
 'intercostal_pleural_thickening': 4,
 'lung_field_infiltration': 5,
 'degenerative_joint_disease_of_the_thoracic_spine': 6,
 'scoliosis': 7}

In [None]:
# write class_id
def write_class_id(df):
    class_id = []
    for i in range(df.shape[0]):
        class_id.append(label2class[df.iloc[i, 1]])
    df["class_id"] = class_id

write_class_id(train_df)
train_df

Unnamed: 0,ID,category,Width,Height,Filename,ImagePath,MarkPath,xmin,ymin,xmax,ymax,class_id
0,TDR04_20180315_075734,normal,2328,2344,220_97.dcm,normal/image/220_97.dcm,normal/mark/220_97.dcm.jpg,0,0,0,0,0
1,TDR04_20180315_080518,normal,2472,2560,220_94.dcm,normal/image/220_94.dcm,normal/mark/220_94.dcm.jpg,0,0,0,0,0
2,TDR04_20180315_081322,normal,2312,2496,220_93.dcm,normal/image/220_93.dcm,normal/mark/220_93.dcm.jpg,0,0,0,0,0
3,TDR04_20180315_081746,normal,2448,2584,220_92.dcm,normal/image/220_92.dcm,normal/mark/220_92.dcm.jpg,0,0,0,0,0
4,TDR04_20180315_082113,normal,2144,2384,220_91.dcm,normal/image/220_91.dcm,normal/mark/220_91.dcm.jpg,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
446,TDR02_20161209_161439,scoliosis,2376,2592,4440_5.dcm,scoliosis/image/4440_5.dcm,scoliosis/mark/4440_5.dcm.jpg,1016,560,1432,2328,7
447,TDR04_20180224_084933,scoliosis,2248,2600,4440_0.dcm,scoliosis/image/4440_0.dcm,scoliosis/mark/4440_0.dcm.jpg,912,552,1408,2272,7
448,TDR04_20180226_082354,scoliosis,2488,2456,4440.dcm,scoliosis/image/4440.dcm,scoliosis/mark/4440.dcm.jpg,1016,464,1560,2184,7
449,TDR01_20171106_095308,scoliosis,2320,2376,A0_29.dcm,scoliosis/image/A0_29.dcm,scoliosis/mark/A0_29.dcm.jpg,1032,512,1384,2016,7


---

# Split training set and validation set

這裡要注意的是，由於一張影像可能包含許多類不同疾病 ( multi-label ) ，所以在切分 training set 跟 validation set 時不能用一般的 `train_test_split` ，否則會導致類別不平衡。此外，由於一名病人在 dataframe 中可能有多筆資料，所以分的時候記得要用 ID 去分！

首先把疾病類別改寫為 one-hot encoding 形式：

In [None]:
train_df.nunique()['ID'], train_df.shape[0]

(348, 451)

In [None]:
binarizer = MultiLabelBinarizer()
disease_id = []
for ID in train_df.ID.unique():
    diseases = []
    temp = train_df[train_df["ID"] == ID]
    diseases.extend(list(temp["class_id"]))
    disease_id.append(diseases)

one_hot = binarizer.fit_transform(disease_id)
one_hot

array([[1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1]])

In [None]:
one_hot.shape

(348, 8)

In [None]:
train_ID, train_label, val_ID, val_label = iterative_train_test_split(np.expand_dims(train_df["ID"].unique(), axis = 1), one_hot, test_size = 0.2)

training = train_df[train_df["ID"].isin(train_ID.ravel())]
validation = train_df[train_df["ID"].isin(val_ID.ravel())]

---
# Dataset to COCO format
創建 `images`、`annotations` 和 `categories` 這三部分

In [None]:
categories = []
for l, c in label2class.items():
    if l == "normal":
        continue
    categories.append({"id": c, "name": l})

categories

[{'id': 1, 'name': 'aortic_curvature'},
 {'id': 2, 'name': 'aortic_atherosclerosis_calcification'},
 {'id': 3, 'name': 'cardiac_hypertrophy'},
 {'id': 4, 'name': 'intercostal_pleural_thickening'},
 {'id': 5, 'name': 'lung_field_infiltration'},
 {'id': 6, 'name': 'degenerative_joint_disease_of_the_thoracic_spine'},
 {'id': 7, 'name': 'scoliosis'}]

In [None]:
# change data to coco format
def coco_format(df, categories):
    coco_output = {
        "images" : [],
        "categories" : [],
        "annotations" : []
        }

    coco_output['categories'] = categories

    annotation_id = 0
    for image_id, img_name in enumerate(df.ID.unique()):
        image_df = df[df.ID == img_name]
        if len(image_df) == 1:
            image_dict = {
                "file_name" : list(image_df.category)[0] + "/" + list(image_df.Filename)[0].replace(".dcm", ".jpg"),
                "height" : int(image_df.Height),
                "width" : int(image_df.Width),
                "id" : image_id
                }
        else:
            unique = image_df.iloc[0, :]
            image_dict = {
                "file_name" : unique.category + "/" + unique.Filename.replace(".dcm", ".jpg"),
                "height" : int(unique.Height),
                "width" : int(unique.Width),
                "id" : image_id
                }
        coco_output['images'].append(image_dict)

        for _, row in image_df.iterrows():
            xmin = int(row.xmin)
            ymin = int(row.ymin)
            xmax = int(row.xmax)
            ymax = int(row.ymax)
            if xmin == ymin == xmax == ymax == 0:
                continue

            area = (xmax - xmin) * (ymax - ymin)

            poly = [
                (xmin, ymin), (xmax, ymin),
                (xmax, ymax), (xmin, ymax)
            ]
            poly = list(itertools.chain.from_iterable(poly))

            mask_dict = {
                "id" : annotation_id,
                "image_id" : image_id,
                "category_id" : row.class_id,
                "bbox" : [xmin, ymin, (xmax - xmin), (ymax - ymin)],
                "area" : area,
                "iscrowd" : 0,
                "segmentation" : [poly],
                }
            coco_output["annotations"].append(mask_dict)
            annotation_id += 1

    return coco_output

In [None]:
train_coco = coco_format(training, categories)
val_coco = coco_format(validation, categories)

---

# Save files

這裡將經過 normalization 處理後的影像存為 .jpg 檔，並將轉換為 COCO format後的資料存為 .json 檔，方便之後使用：

In [None]:
def dcm_to_jpg(df):

    for path in df.ImagePath:
        dcm_path = os.path.join(config.root, "train", path)
        _, _, image = X_ray_normalization(dcm_path, vmin = 0, vmax = 2.5)
        file = os.path.join("/kaggle/working/", path.split("/")[0])
        jpg_name = path.split("/")[-1].replace(".dcm", ".jpg")

        if os.path.isdir(file) == False:
            os.makedirs(file)

        plt.imsave(f"{file}/{jpg_name}", image, cmap = "gray")

In [None]:
dcm_to_jpg(train_df)

In [None]:
with open("train.json", "w") as outfile:
    json.dump(train_coco, outfile)


with open("val.json", "w") as outfile:
    json.dump(val_coco, outfile)

In [None]:
import shutil

# 定義本地與雲端的路徑
local_root = "/kaggle/working/"
cloud_root = "/content/drive/MyDrive/processed_images/"

# 將本地的 JPG 圖片儲存到雲端
def upload_to_cloud(local_root, cloud_root):
    for root, dirs, files in os.walk(local_root):
        for file in files:
            if file.endswith(".jpg"):
                # 取得本地檔案路徑
                local_path = os.path.join(root, file)

                # 建立相對的雲端路徑
                relative_path = os.path.relpath(local_path, local_root)
                cloud_path = os.path.join(cloud_root, relative_path)
                cloud_dir = os.path.dirname(cloud_path)

                # 確保雲端目錄存在
                if not os.path.exists(cloud_dir):
                    os.makedirs(cloud_dir)

                # 複製檔案到雲端
                shutil.copy(local_path, cloud_path)
                print(f"Uploaded {file} to {cloud_path}")

# 執行上傳
upload_to_cloud(local_root, cloud_root)

## **Preparing for testing dataset**
將dcm轉成jpg

In [None]:
test_df = pd.read_csv("/content/drive/MyDrive/hwk05_data/test.csv")
test_df

Unnamed: 0,ID,Width,Height,Filename,ImagePath
0,TDR02_20161123_145314,2328,2360,001.dcm,/image/001.dcm
1,TDR01_20171106_111727,2328,2424,002.dcm,/image/002.dcm
2,TDR01_20180510_090210,2296,2432,003.dcm,/image/003.dcm
3,TDR01_20180511_092549,2392,2576,004.dcm,/image/004.dcm
4,TDR04_20180316_084316,2328,2352,005.dcm,/image/005.dcm
...,...,...,...,...,...
108,TDR04_20180223_093206,2048,2480,109.dcm,/image/109.dcm
109,TDR02_20161125_122319,2096,2296,110.dcm,/image/110.dcm
110,TDR02_20180123_115426,2400,2544,111.dcm,/image/111.dcm
111,TDR01_20180508_173616,1536,1824,112.dcm,/image/112.dcm


In [None]:
def dcm_to_jpg_test(df):

    for path in df.ImagePath:
        #dcm_path = "/content/drive/MyDrive/hwk05_data/test/image/"
        dcm_path = os.path.join(config.root, "test", path[1:])
        print(dcm_path)
        #dcm_path = os.path.join(config.root, "test", path)
        _, _, image = X_ray_normalization(dcm_path, vmin = 0, vmax = 2.5)
        file = os.path.join("/kaggle/workingTest/", path.split("/")[0])
        jpg_name = path.split("/")[-1].replace(".dcm", ".jpg")

        if os.path.isdir(file) == False:
            os.makedirs(file)

        plt.imsave(f"{file}/{jpg_name}", image, cmap = "gray")

In [None]:
dcm_to_jpg_test(test_df)

/content/drive/MyDrive/hwk05_data/test/image/001.dcm
/content/drive/MyDrive/hwk05_data/test/image/002.dcm
/content/drive/MyDrive/hwk05_data/test/image/003.dcm
/content/drive/MyDrive/hwk05_data/test/image/004.dcm
/content/drive/MyDrive/hwk05_data/test/image/005.dcm
/content/drive/MyDrive/hwk05_data/test/image/006.dcm
/content/drive/MyDrive/hwk05_data/test/image/007.dcm
/content/drive/MyDrive/hwk05_data/test/image/008.dcm
/content/drive/MyDrive/hwk05_data/test/image/009.dcm
/content/drive/MyDrive/hwk05_data/test/image/010.dcm
/content/drive/MyDrive/hwk05_data/test/image/011.dcm
/content/drive/MyDrive/hwk05_data/test/image/012.dcm
/content/drive/MyDrive/hwk05_data/test/image/013.dcm
/content/drive/MyDrive/hwk05_data/test/image/014.dcm
/content/drive/MyDrive/hwk05_data/test/image/015.dcm
/content/drive/MyDrive/hwk05_data/test/image/016.dcm
/content/drive/MyDrive/hwk05_data/test/image/017.dcm
/content/drive/MyDrive/hwk05_data/test/image/018.dcm
/content/drive/MyDrive/hwk05_data/test/image/0

In [None]:
import shutil

# 定義本地與雲端的路徑
local_root = "/kaggle/workingTest/"
cloud_root = "/content/drive/MyDrive/processed_images/test"

# 將本地的 JPG 圖片儲存到雲端
def upload_to_cloud(local_root, cloud_root):
    for root, dirs, files in os.walk(local_root):
        for file in files:
            if file.endswith(".jpg"):
                # 取得本地檔案路徑
                local_path = os.path.join(root, file)

                # 建立相對的雲端路徑
                relative_path = os.path.relpath(local_path, local_root)
                cloud_path = os.path.join(cloud_root, relative_path)
                cloud_dir = os.path.dirname(cloud_path)

                # 確保雲端目錄存在
                if not os.path.exists(cloud_dir):
                    os.makedirs(cloud_dir)

                # 複製檔案到雲端
                shutil.copy(local_path, cloud_path)
                print(f"Uploaded {file} to {cloud_path}")

# 執行上傳
upload_to_cloud(local_root, cloud_root)