<a href="https://colab.research.google.com/github/Annie00000/Project/blob/main/1_7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import os
from PIL import Image
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import random
import time
from tensorflow.keras.preprocessing.image import load_img, img_to_array

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import Sequence



In [None]:
# cpu
import os

cpu_cores = os.cpu_count()
print("CPU cores:", cpu_cores)


# GPU
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        print(gpu)
else:
    print("No GPU found")

## Data augmentation (自定義數據增強)

#### 不使用augmentation_dict,固定做隨機旋轉10度

In [None]:
from scipy.ndimage import rotate

class CustomDataGenerator(Sequence):
    def __init__(self, image_paths, labels, batch_size, target_size, label_to_index, num_classes, shuffle=True):
        self.image_paths = np.array(image_paths)
        self.labels = np.array(labels)
        self.batch_size = batch_size
        self.target_size = target_size
        #self.augmentation_dict = augmentation_dict
        self.label_to_index = label_to_index
        self.num_classes = num_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.image_paths) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_indices = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_images = []
        batch_labels = []

        for i in batch_indices:
            img_path = self.image_paths[i]
            label = self.labels[i]

            # 加载和预处理图像 (直接在 TensorFlow 中載入和解碼映像。這意味著從一開始圖像就是以 Tensor 的形式存在的)
            img = tf.io.read_file(img_path)
            img = tf.image.decode_image(img, channels=3)
            img = tf.image.resize(img, self.target_size)

            # 应用数据增强
            img = self.apply_augmentation(img, label)/ 255 # 規一化
            batch_images.append(img)
            batch_labels.append(self.label_to_index[label])

        return tf.convert_to_tensor(batch_images), to_categorical(batch_labels, num_classes=self.num_classes)

    def apply_augmentation(self, image):
        # 随机旋转（正负 10 度）
        rotation_degree = random.uniform(-10, 10) #np.random.uniform(-10, 10)
        image = rotate(image, rotation_degree, reshape=False, mode='nearest')
          # reshape=False 保证旋转后的图像大小不变，但这可能导致图像的一部分被裁剪
          # mode 决定了在旋转过程中如何处理图像边界之外的像素

        # 对比度增强
        contrast_factor = 1.5  # 可以根据需要调整这个值
        image = self.adjust_contrast(image, contrast_factor)

        return image

    def adjust_contrast(self, image, contrast_factor):
        """调整图像的对比度"""
        mean = np.mean(image, axis=(0, 1), keepdims=True)
        adjusted = (image - mean) * contrast_factor + mean
        return np.clip(adjusted, 0, 255)


    def on_epoch_end(self):
        self.indices = np.arange(len(self.image_paths))
        if self.shuffle:
            np.random.shuffle(self.indices)



# 创建数据生成器实例
train_generator = CustomDataGenerator(
    train_paths, train_labels, batch_size=32, target_size=(224, 224),
     label_to_index=label_to_index,
    num_classes=len(unique_labels), shuffle=True
)
val_generator = CustomDataGenerator(
    val_paths, val_labels, batch_size=32, target_size=(224, 224),
    label_to_index=label_to_index,
    num_classes=len(unique_labels), shuffle=False
)

## subprogram

* class name

In [None]:
# 从训练数据中提取
data_dir = '/path/to/training/data'
class_labels = [folder_name for folder_name in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, folder_name))]


### 1. train model

In [None]:
def retrained_model():


In [None]:
import os
import datetime
import tensorflow as tf

def retrain_model(train_data, test_data, model, model_name, model_save_path, log_file_path):
    """
    重新训练模型并保存结果。

    :param train_data: 训练数据。
    :param test_data: 测试数据。
    :param model: 要训练的模型。
    :param model_name: 模型名称。
    :param model_save_path: 模型保存路径。
    :param log_file_path: 日志文件保存路径。
    """
    # 训练模型
    model.fit(train_data, validation_data=val_data, epochs=10, callbacks=callbacks)


    # 评估模型
    test_loss, test_accuracy = model.evaluate(test_data)

    # 保存模型 (不需要的話可以改動)
    timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
    model_filename = f"{model_name}_{timestamp}.h5"
    model.save(os.path.join(model_save_path, model_filename))

    # 记录到日志文件 ('a'為追加，'w'為覆蓋原始寫的)(詢問一下要覆蓋還是追加)
    with open(log_file_path, 'a') as log_file:
        log_file.write(f"{model_filename}: Test Accuracy = {test_accuracy}\n")

    return test_accuracy, model_filename


In [None]:
from tensorflow.keras.models import load_model
from sklearn.model_selection import train_test_split
from datetime import datetime

def retrain_model(data_dir, model_path, model_name, log_file_path, batch_size, target_size, augmentation_dict, label_to_index, num_classes, test_size=0.2):
    # 读取所有影像路径和标签
    image_paths = []
    labels = []

    for class_folder in os.listdir(data_dir):
        class_folder_path = os.path.join(data_dir, class_folder)
        for file in os.listdir(class_folder_path):
            fpath = os.path.join(class_folder_path, file)
            image_paths.append(fpath)
            labels.append(class_folder)

    # 切分成训练集和验证集
    train_paths, val_paths, train_labels, val_labels = train_test_split(image_paths, labels, test_size=test_size, stratify=labels, shuffle=True, random_state=42)

    # 创建数据生成器实例
    train_generator = CustomDataGenerator(
        train_paths, train_labels, batch_size=batch_size, target_size=target_size,
        augmentation_dict=augmentation_dict, label_to_index=label_to_index,
        num_classes=num_classes, shuffle=True, apply_clahe=True
    )
    val_generator = CustomDataGenerator(
        val_paths, val_labels, batch_size=batch_size, target_size=target_size,
        augmentation_dict={}, label_to_index=label_to_index,
        num_classes=num_classes, shuffle=False, apply_clahe=False
    )

    # 加载模型
    model = load_model(model_path)

    # 重新训练模型
    model.fit(
        train_generator,
        validation_data=val_generator,
        epochs=10,
        callbacks=[early_stopping, model_checkpoint, reduce_lr]
    )

    # 评估模型
    test_loss, test_accuracy = model.evaluate(val_generator)

    # 保存模型
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    saved_model_name = f"{model_name}_{timestamp}.h5"
    model.save(saved_model_name)

    # 记录到日志文件
    with open(log_file_path, 'a') as log_file:
        log_file.write(f"{timestamp}: Test Accuracy = {test_accuracy}\n")

    return test_accuracy, saved_model_name

### 2. predict

* 判断给定路径的文件夹内是否包含其他子文件夹。

In [None]:
import os

def predict_images_based_on_folder_structure(folder_path, model, target_size, class_labels):
    """
    根据文件夹结构预测图片。

    :param folder_path: 文件夹路径。
    :param model: 预训练的模型。
    :param target_size: 图像目标尺寸。
    :param class_labels: 类别标签列表。
    :return: 预测结果。
    """
    # 检查是否存在子文件夹
    contains_subfolders = any(os.path.isdir(os.path.join(folder_path, item)) for item in os.listdir(folder_path))

    if contains_subfolders:
        # 如果存在子文件夹，使用多文件夹预测函数
        return predict_multiple_folders(folder_path, model, target_size, class_labels)
    else:
        # 否则，使用单文件夹预测函数
        return predict_single_folder(folder_path, model, target_size, class_labels)


* 單個文件夾

In [None]:
def predict_single_folder(folder_path, model_path):
    model = load_model(model_path)

    predictions = {}
    for filename in os.listdir(folder_path):
        if filename.lower().endswith('.png'):
            img_path = os.path.join(folder_path, filename)
            img = load_and_preprocess_image(img_path)
            pred = model.predict(np.expand_dims(img, axis=0))
            predicted_class = class_labels[np.argmax(pred)]
            predictions[filename] = {
                'class': predicted_class,
                'probabilities': pred[0].tolist()
            }
    return predictions

* 一個文件夾下有多個文件夾

In [None]:
def predict_multiple_folders(parent_folder_path, model_path):
    model = load_model(model_path)
    all_predictions = {}
    for folder_name in os.listdir(parent_folder_path):
        folder_path = os.path.join(parent_folder_path, folder_name)
        if os.path.isdir(folder_path):
            predictions = predict_single_folder(folder_path, model_path)
            all_predictions[folder_name] = predictions
    return all_predictions