In [None]:
# 检查每个类别中的图像数量,并显示几个示例
from PIL import Image
import matplotlib.pyplot as plt
import os

bleached_path ='images/bleached_corals'
healthy_path = 'images/healthy_corals'

# 获得两个类别的图像列表
bleached_images = os.listdir(bleached_path)
healthy_images = os.listdir(healthy_path)

# 显示图像的数量和一些样本图像
num_samples_to_display = 3
fig, axes = plt.subplots(2, num_samples_to_display, figsize=(15, 7))

for i, img_file in enumerate(bleached_images[:num_samples_to_display]):
    img = Image.open(os.path.join(bleached_path, img_file))
    axes[0, i].imshow(img)
    axes[0, i].set_title("Bleached")
    axes[0, i].axis('off')

for i, img_file in enumerate(healthy_images[:num_samples_to_display]):
    img = Image.open(os.path.join(healthy_path, img_file))
    axes[1, i].imshow(img)
    axes[1, i].set_title("Healthy")
    axes[1, i].axis('off')

plt.tight_layout()
plt.show()

len(bleached_images), len(healthy_images)


# 数据预处理：查看图片的大小、格式，进行尺寸调整、归一化等预处理。

In [5]:
import numpy as np
from sklearn.model_selection import train_test_split

# 参数
IMG_SIZE = (224, 224)
SEED = 42

# 处理图像:
# 调整图片大小：将所有图片调整为224x224的大小。
# 归一化：将图片的像素值范围从[0,255]调整到[0,1]。
def process_images(file_paths):
    data = []
    for file_path in file_paths:
        img = Image.open(file_path).resize(IMG_SIZE)
        img_array = np.array(img) / 255.0
        data.append(img_array)
    return np.array(data)

# 得到完整的图像路径
bleached_full_paths = [os.path.join(bleached_path, img) for img in bleached_images]
healthy_full_paths = [os.path.join(healthy_path, img) for img in healthy_images]

# 处理图像
bleached_data = process_images(bleached_full_paths)
healthy_data = process_images(healthy_full_paths)

# 分配标签:0白化,1健康
bleached_labels = np.zeros(len(bleached_data))
healthy_labels = np.ones(len(healthy_data))

# 连接数据和标签
X = np.concatenate([bleached_data, healthy_data])
y = np.concatenate([bleached_labels, healthy_labels])

# 将数据分成训练集和测试集
# 将数据拆分为80%的训练集和20%的测试集。
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=SEED, stratify=y)

X_train.shape, X_test.shape


((738, 224, 224, 3), (185, 224, 224, 3))

# 卷积神经网络 (CNN)。

In [8]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# 1. 构建模型
model = Sequential()

# 第一个卷积层
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

# 第二个卷积层
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# 第三个卷积层
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# 全连接层
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))  # 二分类问题使用sigmoid激活函数

# 查看模型结构
model.summary()

# 2. 编译模型
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 3. 训练模型
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# 4. 评估模型
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"测试集上的准确率: {test_accuracy:.4f}")

# 5. 保存模型
model.save("coral_model.h5")
print("模型已保存为coral_model.h5")


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 111, 111, 32)      0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 109, 109, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 54, 54, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 52, 52, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 26, 26, 128)       0

  saving_api.save_model(


模型已保存为coral_model.h5


# 优化！
了解。为了优化代码、提高运行速度和准确率，我们可以采取以下策略：

1. **使用更复杂的模型结构**：增加更多的卷积层和全连接层可以帮助模型捕捉更复杂的特征。
2. **数据增强**：使用数据增强技术可以在训练时生成更多的数据，帮助模型泛化得更好。
3. **正则化**：添加dropout层可以减少过拟合，提高模型的泛化能力。
4. **更高效的优化器**：例如，我们可以继续使用Adam，但可能会调整学习率。

此代码使用了数据增强、更复杂的模型结构、正则化和批量标准化，以期望提高模型的准确率。您可以在本地运行此代码并查看性能改进情况。

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# 1. 数据增强
datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_generator = datagen.flow(X_train, y_train, batch_size=32)

# 2. 构建模型
model = Sequential()

# 第一个卷积层
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# 第二个卷积层
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# 第三个卷积层
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# 全连接层
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# 3. 编译模型
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), 
              loss='binary_crossentropy', 
              metrics=['accuracy'])

# 4. 训练模型
history = model.fit(train_generator, 
                    epochs=25, 
                    validation_data=(X_test, y_test))

# 5. 评估模型
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"测试集上的准确率: {test_accuracy:.4f}")

# 6. 保存模型
model.save("optimized_coral_model.h5")
print("模型已保存为optimized_coral_model.h5")


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25