In [None]:
import os
import random
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import numpy as np
from matplotlib.ticker import MaxNLocator
from matplotlib.image import imread
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.applications import EfficientNetB0

In [None]:
#数据可视化
def plot_random_images(dataset_path, num_images=5):

    classes = [d for d in os.listdir(dataset_path)
               if os.path.isdir(os.path.join(dataset_path, d))]

    if not classes:
        print(f"No subdirectories found in {dataset_path}")
        return

    plt.figure(figsize=(15, 10))

    for i in range(num_images):

        selected_class = random.choice(classes)
        class_path = os.path.join(dataset_path, selected_class)

        images = [f for f in os.listdir(class_path)
                 if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

        if not images:
            print(f"No images found in {selected_class}")
            continue

        selected_image = random.choice(images)
        img_path = os.path.join(class_path, selected_image)

        img = imread(img_path)

        plt.subplot(1, num_images, i+1)
        plt.imshow(img)
        plt.title(selected_class)
        plt.axis('off')

    plt.tight_layout()
    plt.show()
train_path = '/kaggle/input/fruit-classification-dataset/Fruit_dataset/train1'
plot_random_images(dataset_path = train_path, num_images=5)

In [None]:
val_path = '/kaggle/input/fruit-classification-dataset/Fruit_dataset/val1'
plot_random_images(dataset_path = val_path, num_images=5)

In [None]:
train_path = "/kaggle/input/fruit-classification-dataset/Fruit_dataset/train1"
val_path = "/kaggle/input/fruit-classification-dataset/Fruit_dataset/val1"

BATCH_SIZE = 32
IMG_SIZE = (224, 224)

#创建训练集
train_dataset = image_dataset_from_directory(
    train_path,
    shuffle=True,
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
    label_mode='categorical',
    seed=42
)

#创建验证集
val_dataset = image_dataset_from_directory(
    val_path,
    shuffle=False,
    batch_size=BATCH_SIZE,
    image_size=IMG_SIZE,
    label_mode='categorical',
    seed=42
)

class_names = train_dataset.class_names

AUTOTUNE = tf.data.AUTOTUNE
train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
val_dataset = val_dataset.prefetch(buffer_size=AUTOTUNE)


#数据集信息
print(f"Number of classes: {len(class_names)}")
print(f"Class names: {class_names}")
print(f"Training batches: {tf.data.experimental.cardinality(train_dataset)}")
print(f"Validation batches: {tf.data.experimental.cardinality(val_dataset)}")

In [None]:
#全连接神经网络
inputs = layers.Input(shape=(224,224,3))

#归一化
x = layers.Rescaling(1./255)(inputs)

#神经网络
x = layers.Flatten()(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(100, activation='softmax')(x)

model = models.Model(inputs=inputs, outputs=outputs)

In [None]:
#模型预设
model.compile(
    optimizer=optimizers.Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

#回调设置
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

reduce_lr = callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=3,
    min_lr=1e-7
)

#训练
history = model.fit(
    train_dataset,
    epochs=100,
    validation_data=val_dataset,
    callbacks=[early_stopping, reduce_lr]
)

In [None]:
#卷积神经网络
inputs = layers.Input(shape=(224,224,3))

#归一化
x = layers.Rescaling(1./255)(inputs)

#卷积基
x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2, 2))(x)

x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2, 2))(x)

x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2, 2))(x)

x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2, 2))(x)

#分类头
x = layers.Flatten()(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(100, activation='softmax')(x)

model_1 = models.Model(inputs=inputs, outputs=outputs)

In [None]:
#模型预设
model_1.compile(
    optimizer=optimizers.Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

#回调设置
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

reduce_lr = callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=3,
    min_lr=1e-7
)

#训练
history = model_1.fit(
    train_dataset,
    epochs=100,
    validation_data=val_dataset,
    callbacks=[early_stopping, reduce_lr]
)

In [None]:
#数据增强预设
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    layers.RandomContrast(0.1),
])

inputs = layers.Input(shape=(224,224,3))

#数据增强
x = data_augmentation(inputs)

#归一化
x = layers.Rescaling(1./255)(x)

#卷积基
x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2, 2))(x)

x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2, 2))(x)

x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2, 2))(x)

x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D((2, 2))(x)

#分类头
x = layers.Flatten()(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(100, activation='softmax')(x)

model_2 = models.Model(inputs=inputs, outputs=outputs)

In [None]:
#模型预设
model_2.compile(
    optimizer=optimizers.Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

#回调设置
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

reduce_lr = callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=3,
    min_lr=1e-7
)

#训练
history_2 = model_2.fit(
    train_dataset,
    epochs=100,
    validation_data=val_dataset,
    callbacks=[early_stopping, reduce_lr]
)

In [None]:
#数据增强预设
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.15),
    layers.RandomZoom(0.15),
    layers.RandomContrast(0.1),
])

#下载预训练模型
base_model = EfficientNetB0(
    include_top=False,
    weights='imagenet',
    input_shape=(224,224,3)
)

#冻结基模型
base_model.trainable = False

#创建顶部网络
inputs = layers.Input(shape=(224,224,3))

#数据增强
x = data_augmentation(inputs)

#归一化
x = tf.keras.applications.efficientnet.preprocess_input(x)

#基模型
x = base_model(x, training=False)

#加入新层
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(100, activation='softmax')(x)

model_3 = models.Model(inputs, outputs)

In [None]:
#模型预设
model_3.compile(
    optimizer=optimizers.Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

#回调设置
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

reduce_lr = callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=3,
    min_lr=1e-7
)

#训练
print("Training top layers only")
history_3_ENB0 = model_3.fit(
    train_dataset,
    epochs=100,
    validation_data=val_dataset,
    callbacks=[early_stopping, reduce_lr]
)


In [None]:
#定义解冻函数
def unfreeze_model(model):
    for layer in model.layers:
        if layer.__class__.__name__ == 'BatchNormalization':
            layer.trainable = False

    base_model = None
    for layer in model.layers:
        if layer.__class__.__name__ == 'Functional': 
            base_model = layer
            break


    if base_model is None:
        print("Error: EfficientNetB0 layer not found in the model.")
        return

    #解冻30层
    for layer in base_model.layers[-30:]:
        if layer.__class__.__name__ != 'BatchNormalization':
            layer.trainable = True


    model.compile(
        optimizer=optimizers.Adam(learning_rate=1e-5),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

print("Fine-Tuning Top Layers of Model_3")
unfreeze_model(model_3)

#训练
history_4_fine_ENB0 = model_3.fit(
    train_dataset,
    epochs=100,
    validation_data=val_dataset,
    callbacks=[early_stopping, reduce_lr]
)

In [None]:
#模型学习过程可视化
def plot_training_histories(histories, model_names, num_classes=100):

    plt.figure(figsize=(18, 12))

    #颜色与线条类型
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
    linestyles = ['-', '--', '-.', ':']

    #准确性图
    plt.subplot(2, 2, 1)
    for i, history in enumerate(histories):
        epochs = range(1, len(history.history['accuracy']) + 1)
        plt.plot(epochs, history.history['accuracy'],
                label=f'{model_names[i]} (Train)',
                color=colors[i], linestyle=linestyles[0], linewidth=2)
        plt.plot(epochs, history.history['val_accuracy'],
                label=f'{model_names[i]} (Val)',
                color=colors[i], linestyle=linestyles[1], linewidth=2)

    plt.title(f'Accuracy Comparison ({num_classes}-Class Problem)', pad=20, fontsize=14)
    plt.xlabel('Epochs', fontsize=12)
    plt.ylabel('Accuracy', fontsize=12)
    plt.gca().yaxis.set_major_formatter(plt.FormatStrFormatter('%.2f'))
    plt.grid(True, alpha=0.3)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

    #损失函数图
    plt.subplot(2, 2, 2)
    for i, history in enumerate(histories):
        epochs = range(1, len(history.history['loss']) + 1)
        plt.plot(epochs, history.history['loss'],
                label=f'{model_names[i]} (Train)',
                color=colors[i], linestyle=linestyles[0], linewidth=2)
        plt.plot(epochs, history.history['val_loss'],
                label=f'{model_names[i]} (Val)',
                color=colors[i], linestyle=linestyles[1], linewidth=2)

    plt.title(f'Loss Comparison ({num_classes}-Class Problem)', pad=20, fontsize=14)
    plt.xlabel('Epochs', fontsize=12)
    plt.ylabel('Loss', fontsize=12)
    plt.grid(True, alpha=0.3)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')


    plt.subplot(2, 2, 4)
    final_val_acc = [max(h.history['val_accuracy']) for h in histories]
    final_val_loss = [min(h.history['val_loss']) for h in histories]

    x = np.arange(len(model_names))
    width = 0.35

    rects1 = plt.bar(x - width/2, final_val_acc, width,
                    label='Max Val Accuracy', color='#2ca02c')
    rects2 = plt.bar(x + width/2, final_val_loss, width,
                    label='Min Val Loss', color='#d62728')

    plt.title('Final Model Comparison', pad=20, fontsize=14)
    plt.xlabel('Models', fontsize=12)
    plt.ylabel('Metric Value', fontsize=12)
    plt.xticks(x, model_names, rotation=45, ha='right')
    plt.grid(True, axis='y', alpha=0.3)

    for rect in rects1 + rects2:
        height = rect.get_height()
        plt.annotate(f'{height:.3f}',
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom', fontsize=9)

    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

    plt.tight_layout(pad=3.0)
    plt.suptitle(f'Model Training Comparison for {num_classes}-Class Image Classification',
                y=1.02, fontsize=16)
    plt.show()

In [None]:
histories = [history, history_2, history_3_ENB0, history_4_fine_ENB0]
model_names = [
    'Base CNN',
    'Augmentation CNN',
    'EfficientNetB0',
    'Fine-Tuned ENB0'
]

plot_training_histories(histories, model_names, num_classes=100)

In [None]:
#模型预测能力检验
def plot_predictions(model, dataset, class_names, num_images=10):
    #随机抽取一批图片
    for images, labels in dataset.take(1):
        #模型预测
        predictions = model.predict(images)
        predicted_labels = tf.argmax(predictions, axis=1)
        true_labels = tf.argmax(labels, axis=1)

        #可视化
        plt.figure(figsize=(20, 12))

        for i in range(min(num_images, len(images))):
            plt.subplot(2, 5, i+1)
            plt.imshow(images[i].numpy().astype("uint8"))

            pred_class = class_names[predicted_labels[i]]
            true_class = class_names[true_labels[i]]
            confidence = np.max(predictions[i]) * 100

            color = 'green' if predicted_labels[i] == true_labels[i] else 'red'

            title = f"Pred: {pred_class}\nTrue: {true_class}\nConf: {confidence:.1f}%"
            plt.title(title, color=color, fontsize=10)
            plt.axis('off')

        plt.suptitle(f"Fine-Tuned ENB0 Predictions on Validation Set\n(Green=Correct, Red=Incorrect)",
                    fontsize=14, y=1.02)
        plt.tight_layout()
        plt.show()

        #打印准确率
        accuracy = np.mean(predicted_labels == true_labels)
        print(f"Batch Accuracy: {accuracy:.2%}")


In [None]:
plot_predictions(model_3, val_dataset, class_names, num_images=10)

In [None]:
def plot_predictions(model, dataset, class_names, num_images=10):
    all_images = []
    all_labels = []

    for images, labels in dataset.unbatch():
        all_images.append(images)
        all_labels.append(labels)

    all_images = np.array(all_images)
    all_labels = np.array(all_labels)

    #随机选取图像
    random_indices = random.sample(range(len(all_images)), min(num_images, len(all_images)))
    selected_images = all_images[random_indices]
    selected_labels = all_labels[random_indices]

    #模型预测
    predictions = model.predict(selected_images)
    predicted_labels = tf.argmax(predictions, axis=1)
    true_labels = tf.argmax(selected_labels, axis=1)

    #可视化
    plt.figure(figsize=(20, 12))

    for i in range(len(random_indices)):
        plt.subplot(2, 5, i+1)
        plt.imshow(selected_images[i].astype("uint8"))

        pred_class = class_names[predicted_labels[i]]
        true_class = class_names[true_labels[i]]
        confidence = np.max(predictions[i]) * 100

        color = 'green' if predicted_labels[i] == true_labels[i] else 'red'

        title = f"Pred: {pred_class}\nTrue: {true_class}\nConf: {confidence:.1f}%"
        plt.title(title, color=color, fontsize=10)
        plt.axis('off')

    plt.suptitle(f"Fine-Tuned ENB0 Predictions on Validation Set\n(Green=Correct, Red=Incorrect)",
                fontsize=14, y=1.02)
    plt.tight_layout()
    plt.show()

    #打印准确率
    accuracy = np.mean(predicted_labels == true_labels)
    print(f"Sample Accuracy: {accuracy:.2%}")


In [None]:
plot_predictions(model_3, val_dataset, class_names, num_images=10)