In [1]:
from google.colab import drive

drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [2]:
!mkdir -p ~/.kaggle

In [3]:
!cp /content/gdrive/MyDrive/kaggle.json ~/.kaggle/

In [4]:
!kaggle competitions list

ref                                                                                deadline             category                reward  teamCount  userHasEntered  
---------------------------------------------------------------------------------  -------------------  ---------------  -------------  ---------  --------------  
https://www.kaggle.com/competitions/arc-prize-2024                                 2024-11-10 23:59:00  Featured         1,100,000 Usd       1161           False  
https://www.kaggle.com/competitions/gemma-language-tuning                          2025-01-15 00:59:00  Analytics          150,000 Usd          0           False  
https://www.kaggle.com/competitions/jane-street-real-time-market-data-forecasting  2025-01-13 23:59:00  Featured           120,000 Usd         54           False  
https://www.kaggle.com/competitions/nfl-big-data-bowl-2025                         2025-01-08 23:59:00  Analytics          100,000 Usd          0           False  
https://www.kagg

In [5]:
!kaggle datasets download -d paultimothymooney/kermany2018

Dataset URL: https://www.kaggle.com/datasets/paultimothymooney/kermany2018
License(s): CC-BY-NC-SA-4.0
Downloading kermany2018.zip to /content
100% 10.8G/10.8G [01:29<00:00, 103MB/s] 
100% 10.8G/10.8G [01:29<00:00, 130MB/s]


In [None]:
!unzip "/content/kermany2018.zip" -d "/content/gdrive/My Drive"

[1;30;43m串流輸出內容已截斷至最後 5000 行。[0m
  inflating: /content/gdrive/My Drive/oct2017/__MACOSX/OCT2017 /train/DME/._DME-4484581-2.jpeg  
  inflating: /content/gdrive/My Drive/oct2017/__MACOSX/OCT2017 /train/DME/._DME-4484581-3.jpeg  
  inflating: /content/gdrive/My Drive/oct2017/__MACOSX/OCT2017 /train/DME/._DME-4484581-4.jpeg  
  inflating: /content/gdrive/My Drive/oct2017/__MACOSX/OCT2017 /train/DME/._DME-4484581-5.jpeg  
  inflating: /content/gdrive/My Drive/oct2017/__MACOSX/OCT2017 /train/DME/._DME-4484581-6.jpeg  
  inflating: /content/gdrive/My Drive/oct2017/__MACOSX/OCT2017 /train/DME/._DME-4484581-7.jpeg  
  inflating: /content/gdrive/My Drive/oct2017/__MACOSX/OCT2017 /train/DME/._DME-4484581-8.jpeg  
  inflating: /content/gdrive/My Drive/oct2017/__MACOSX/OCT2017 /train/DME/._DME-4490546-1.jpeg  
  inflating: /content/gdrive/My Drive/oct2017/__MACOSX/OCT2017 /train/DME/._DME-4490546-10.jpeg  
  inflating: /content/gdrive/My Drive/oct2017/__MACOSX/OCT2017 /train/DME/._DME-4490546-11.

In [None]:
import os

# 設定訓練和測試資料夾的路徑
train_folder = os.path.join('/','content','gdrive','My Drive','OCT', 'train', '**', '*.jpeg')
test_folder = os.path.join('/','content','gdrive','My Drive','OCT', 'test', '**', '*.jpeg')

In [None]:
def input_fn(file_pattern, labels,
             image_size=(224, 224),
             shuffle=False,
             batch_size=64, 
             num_epochs=None, 
             buffer_size=4096,
             prefetch_buffer_size=None):

    # 建立標籤映射表
    table = tf.contrib.lookup.index_table_from_tensor(mapping=tf.constant(labels))
    num_classes = len(labels)

    def _map_func(filename):
        # 從檔名中提取標籤
        label = tf.strings.split([filename], delimiter=os.sep).values[-2]
        
        # 讀取並解碼圖像
        image = tf.image.decode_jpeg(tf.io.read_file(filename), channels=3)
        image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        
        # 調整圖像大小以符合 VGG16 模型的輸入形狀
        image = tf.image.resize(image, size=image_size)
        
        # 回傳圖像和其對應的 one-hot 編碼標籤
        return (image, tf.one_hot(table.lookup(label), num_classes))
    
    # 創建資料集
    dataset = tf.data.Dataset.list_files(file_pattern, shuffle=shuffle)
    
    # 處理資料集的隨機化和重複
    if num_epochs is not None and shuffle:
        dataset = dataset.apply(tf.data.experimental.shuffle_and_repeat(buffer_size, num_epochs))
    elif shuffle:
        dataset = dataset.shuffle(buffer_size)
    elif num_epochs is not None:
        dataset = dataset.repeat(num_epochs)
    
    # 使用並行處理來加速數據處理
    dataset = dataset.apply(
        tf.data.experimental.map_and_batch(map_func=_map_func,
                                           batch_size=batch_size,
                                           num_parallel_calls=os.cpu_count()))
    
    # 預取數據以提高效率
    dataset = dataset.prefetch(buffer_size=prefetch_buffer_size)
    
    return dataset


In [None]:
import tensorflow as tf
import os

# 設置日誌顯示等級
tf.logging.set_verbosity(tf.logging.INFO)

# 數據集標籤
labels = ['CNV', 'DME', 'DRUSEN', 'NORMAL']

# 載入 VGG16 模型，不包括最後 3 個全連接層
keras_vgg16 = tf.keras.applications.VGG16(input_shape=(224, 224, 3),
                                          include_top=False)

# 將 VGG16 的輸出展平
output = keras_vgg16.output
output = tf.keras.layers.Flatten()(output)

# 添加一個全連接層，並使用 softmax 激活函數進行多類別分類
predictions = tf.keras.layers.Dense(len(labels), activation='softmax')(output)

# 定義模型
model = tf.keras.Model(inputs=keras_vgg16.input, outputs=predictions)

# 冻结 VGG16 模型的最後四層
for layer in keras_vgg16.layers[:-4]:
    layer.trainable = False

# 定義優化器
optimizer = tf.train.AdamOptimizer()

# 編譯模型，使用 categorical_crossentropy 作為損失函數
model.compile(loss='categorical_crossentropy', 
              optimizer=optimizer,
              metrics=['accuracy'])

# 設定訓練日誌的配置
est_config = tf.estimator.RunConfig(log_step_count_steps=10)

# 將 Keras 模型轉換為 Estimator
estimator = tf.keras.estimator.model_to_estimator(model, model_dir='/content/gdrive/My Drive/estlogs', config=est_config)

BATCH_SIZE = 32  # 設定批次大小
EPOCHS = 2  # 設定訓練輪次

# 訓練模型
estimator.train(input_fn=lambda: input_fn(test_folder,
                                         labels,
                                         shuffle=True,
                                         batch_size=BATCH_SIZE,
                                         buffer_size=2048,
                                         num_epochs=EPOCHS,
                                         prefetch_buffer_size=4))
