In [None]:
import json
import pandas as pd

def load_json_data(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

train_annotations = load_json_data('train_mini_arthropoda.json')
val_annotations = load_json_data('val_arthropoda.json')

def get_image_paths_and_labels(annotations):
    image_paths = []
    labels = []
    for image_info in annotations['images']:
        image_path = f"{image_info['file_name']}"  # 根据实际存储路径调整
        image_paths.append(image_path)
        # 找到该图像的所有标注
        image_annotations = [ann for ann in annotations['annotations'] if ann['image_id'] == image_info['id']]
        # 假设每个图像只有一个类别标签
        if image_annotations:
            labels.append(image_annotations[0]['category_id'])
    return image_paths, labels

train_image_paths, train_labels = get_image_paths_and_labels(train_annotations)
val_image_paths, val_labels = get_image_paths_and_labels(val_annotations)

# 创建一个 DataFrame
train_data = pd.DataFrame({
    'filename': train_image_paths,
    'label': train_labels
})

val_data = pd.DataFrame({
    'filename': val_image_paths,
    'label': val_labels
})

train_data.to_csv('train_data.csv', index=False)
val_data.to_csv('val_data.csv', index=False)

In [None]:
from keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import numpy as np

train_data = pd.read_csv('train_data.csv')
val_data = pd.read_csv('val_data.csv')

train_data['label'] = train_data['label'].astype(str)
val_data['label'] = val_data['label'].astype(str)

unique_labels = np.unique(train_data['label'])
print('唯一标签总数', len(unique_labels))

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
# data generator
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    directory='d:/BrainBoner/inatDetection/', 
    x_col='filename',  # DataFrame中包含文件名的列名
    y_col='label',  # DataFrame中包含标签的列名
    target_size=(224, 224),  # 将图像大小调整为224x224
    batch_size=16,
    class_mode='categorical'  # 类别模式
)

val_datagen = ImageDataGenerator(rescale=1./255)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_data,
    directory='d:/BrainBoner/inatDetection/',  # 同上
    x_col='filename',
    y_col='label',
    target_size=(224, 224),
    batch_size=16,
    class_mode='categorical'
)

In [None]:
# 获取训练集的类索引
train_class_indices = train_generator.class_indices
# 获取验证集的类索引
val_class_indices = val_generator.class_indices

# 保存训练集和验证集的类索引到文件
pd.DataFrame.from_dict(train_class_indices, orient='index').to_csv('class_indices.csv')

In [None]:
import matplotlib.pyplot as plt

# 获取一批图像数据
sample_training_images, _ = next(train_generator)

# 定义函数来显示图像
def plotImages(images_arr):
    fig, axes = plt.subplots(1, 5, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip(images_arr, axes):
        ax.imshow(img)
        ax.axis('off')
    plt.tight_layout()
    plt.show()

# 显示图像
plotImages(sample_training_images[:5])

In [None]:
from keras.applications import EfficientNetV2B0
from keras.layers import Dense, GlobalAveragePooling2D, Dropout
from keras.optimizers import SGD
from keras.models import Model
from keras.regularizers import l2

base_model = EfficientNetV2B0(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(2752, activation='softmax', kernel_regularizer=l2(0.01))(x)
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
from keras import backend

def smooth_labels_loss(label_smoothing=0.1):
    def loss(y_true, y_pred):
        num_classes = backend.int_shape(y_pred)[-1]
        smooth = label_smoothing / num_classes
        y_true = y_true * (1 - label_smoothing) + smooth
        return backend.categorical_crossentropy(y_true, y_pred)
    return loss

In [None]:
model.compile(optimizer=SGD(learning_rate=0.01, momentum=0.9, nesterov=True), loss=smooth_labels_loss(0.1), metrics=['accuracy'])

model.summary()

In [None]:
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

checkpoint = ModelCheckpoint(filepath='ckpt.h5', monitor='val_loss', save_best_only=True, save_weights_only=False)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, min_lr=0.00001, verbose=1)

history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=10,
    validation_data=val_generator,
    validation_steps=len(val_generator),
    verbose=1,
    callbacks=[checkpoint, reduce_lr]
)

model.save("full_model.h5")

In [None]:
from keras import backend
from keras.models import load_model
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

def smooth_labels_loss(label_smoothing=0.1):
    def loss(y_true, y_pred):
        num_classes = backend.int_shape(y_pred)[-1]
        smooth = label_smoothing / num_classes
        y_true = y_true * (1 - label_smoothing) + smooth
        return backend.categorical_crossentropy(y_true, y_pred)
    return loss

modelContinue = load_model('ckpt.h5', custom_objects={'loss': smooth_labels_loss(0.1)})

checkpoint = ModelCheckpoint(filepath='ckpt.h5', monitor='val_loss', save_best_only=True, save_weights_only=False)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, min_lr=0.00001, verbose=1)

ContinuedHistory = modelContinue.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=100,
    validation_data=val_generator,
    validation_steps=len(val_generator),
    verbose=1,
    callbacks=[checkpoint, reduce_lr]
)
model.save("full_model.h5")