# 生成混合模型的输入

In [70]:
import os
import shutil

from keras.models import *
from keras.layers import *
from keras.applications import *
from keras.preprocessing.image import *


from keras.applications.inception_v3 import InceptionV3, preprocess_input as inception_v3_preprocess_input
from keras.applications.xception import Xception, preprocess_input as xception_preprocess_input
# from keras.applications.resnet50 import ResNet50, preprocess_input as resnet50_preprocess_input


import h5py
import math

In [71]:
# dir = "/ext/Data/distracted_driver_detection/"
dir = "E:\\JupyterWorkSpace\\BBBBBBS"

resnet50_weight_file = "resnet50-imagenet-finetune152.h5"
xception_weight_file = "xception-imagenet-finetune116.h5"
inceptionV3_weight_file = "inceptionV3-imagenet-finetune172.h5"


### 正常过程输出？

In [72]:
def normal_preprocess_input(x):
    x /= 255.
    x -= 0.5
    x *= 2
    return x

### 训练集，验证集

In [73]:
def write_gap(tag, MODEL, weight_file, image_size, lambda_func=None, featurewise_std_normalization=True):
    input_tensor = Input((*image_size, 3))
    x = input_tensor
    if lambda_func:
        x = Lambda(lambda_func)(x)
    base_model = MODEL(input_tensor=x, weights=None, include_top=False)

    model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output))
    model.load_weights(os.path.join("models", weight_file), by_name=True)

    print(MODEL.__name__)
    train_gen = ImageDataGenerator(
        featurewise_std_normalization=featurewise_std_normalization,
        samplewise_std_normalization=False,
        rotation_range=10.,
        width_shift_range=0.05,
        height_shift_range=0.05,
        shear_range=0.1,
        zoom_range=0.1,
    )
    gen = ImageDataGenerator(
        featurewise_std_normalization=featurewise_std_normalization,
        samplewise_std_normalization=False,
    )

    batch_size = 64
    train_generator = train_gen.flow_from_directory(os.path.join(dir, 'train'), target_size=image_size, shuffle=False, batch_size=batch_size)
    print("subdior to train type {}".format(train_generator.class_indices))
    valid_generator = gen.flow_from_directory(os.path.join(dir, 'valid'), target_size=image_size, shuffle=False, batch_size=batch_size)
    print("subdior to valid type {}".format(valid_generator.class_indices))

    print("predict_generator train {}".format(math.ceil(train_generator.samples // batch_size + 1)))
    train = model.predict(train_generator, steps=math.ceil(train_generator.samples // batch_size + 1))
    print("train: {}".format(train.shape))
    print("predict_generator valid {}".format(math.ceil(valid_generator.samples // batch_size + 1)))
    valid = model.predict(valid_generator, steps=math.ceil(valid_generator.samples // batch_size + 1))
    print("valid: {}".format(valid.shape))

    print("begin create database {}".format(MODEL.__name__))
    with h5py.File(os.path.join("models", tag, "bottleneck_{}.h5".format(MODEL.__name__)), 'w') as h:
        h.create_dataset("train", data=train)
        h.create_dataset("valid", data=valid)
        h.create_dataset("label", data=train_generator.classes)
        h.create_dataset("valid_label", data=valid_generator.classes)
    print("Data saved to bottleneck_{}.h5 successfully.".format(MODEL.__name__))


### 测试集

In [82]:
def write_gap_test(tag, MODEL, weight_file, image_size, lambda_func=None, featurewise_std_normalization=True):
    # 输入张量
    input_tensor = Input((*image_size, 3))
    x = input_tensor
    if lambda_func:
        x = Lambda(lambda_func)(x)  # 使用lambda函数进行额外的预处理
    # 加载基本模型，不包括顶部的全连接层
    base_model = MODEL(input_tensor=x, weights=None, include_top=False)
    # 构建最终的模型，使用全局平均池化
    model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output))
    # 加载权重文件
    model.load_weights("models/" + weight_file, by_name=True)

    print(MODEL.__name__)
    
    # 定义测试数据的生成器
    gen = ImageDataGenerator(
        featurewise_std_normalization=featurewise_std_normalization,
        samplewise_std_normalization=False,
    )
    
    batch_size = 64
    # 加载测试数据
    test_generator = gen.flow_from_directory(
        os.path.join(dir, 'test'),
        target_size=image_size,
        shuffle=False,
        batch_size=batch_size,
        class_mode=None  # 不使用标签
    )
    
    print("predict_generator test {}".format(math.ceil(test_generator.samples // batch_size + 1)))
    
    # 预测测试数据
    test = model.predict(test_generator, steps=math.ceil(test_generator.samples // batch_size + 1))
    print("test: {}".format(test.shape))

    # 将预测结果保存到文件
    print("begin create database {}".format(MODEL.__name__))
    with h5py.File(os.path.join("models", tag, "bottleneck_{}_test.h5".format(MODEL.__name__)),'w') as h:
        h.create_dataset("test", data=test)
    
    print("write_gap_test {} succeeded".format(MODEL.__name__))

### 函数调用

In [None]:
###
### subdir = noscale
###
tag = "finetune"
print("===== Train & Valid =====")
write_gap(tag, ResNet50, resnet50_weight_file, (240, 320))
write_gap(tag, Xception, xception_weight_file, (320, 480), xception_preprocess_input)
write_gap(tag, InceptionV3, inceptionV3_weight_file, (320, 480), inception_v3_preprocess_input)

In [83]:
print("===== Test =====")
write_gap_test(tag, ResNet50, resnet50_weight_file, (240, 320))
write_gap_test(tag, Xception, xception_weight_file, (320, 480), xception_preprocess_input)
write_gap_test(tag, InceptionV3, inceptionV3_weight_file, (320, 480), inception_v3_preprocess_input)

===== Test =====
ResNet50
Found 79726 images belonging to 1 classes.
predict_generator test 1246
test: (79726, 2048)
begin create database ResNet50
write_gap_test ResNet50 succeeded
Xception
Found 79726 images belonging to 1 classes.
predict_generator test 1246
test: (79726, 2048)
begin create database Xception
write_gap_test Xception succeeded
InceptionV3
Found 79726 images belonging to 1 classes.
predict_generator test 1246
test: (79726, 2048)
begin create database InceptionV3
write_gap_test InceptionV3 succeeded


# 自己测试用的，用于改正

In [38]:
lambda_func=None
tag = "finetune"
MODEL = ResNet50
MODEL1 = Xception
MODEL2 = InceptionV3

weight_file = resnet50_weight_file
weight_file1 = xception_weight_file
weight_file2= inceptionV3_weight_file

image_size = (240, 320)
image_size1 = (320, 480)
image_size2 = (320, 480)

featurewise_std_normalization=True
featurewise_std_normalization1=xception_preprocess_input
featurewise_std_normalization2=inception_v3_preprocess_input


In [24]:
# 模型加载权重文件：ResNet50
input_tensor = Input((*image_size, 3))
x = input_tensor
if lambda_func:
    x = Lambda(lambda_func)(x)
base_model = MODEL(input_tensor=x, weights=None, include_top=False)
model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output))
model.load_weights(os.path.join("models", weight_file), by_name=True)

# 当前模型
print(MODEL.__name__)
# 训练数据 生成器
train_gen = ImageDataGenerator(
    featurewise_std_normalization=featurewise_std_normalization,
    samplewise_std_normalization=False,
    rotation_range=10.,
    width_shift_range=0.05,
    height_shift_range=0.05,
    shear_range=0.1,
    zoom_range=0.1,
)
gen = ImageDataGenerator(
    featurewise_std_normalization=featurewise_std_normalization,
    samplewise_std_normalization=False,
)

batch_size = 64
# 加载训练数据
train_generator = train_gen.flow_from_directory(os.path.join(dir, 'train'), target_size=image_size, shuffle=False, batch_size=batch_size)
print("subdior to train type {}".format(train_generator.class_indices))
# 加载验证数据
valid_generator = gen.flow_from_directory(os.path.join(dir, 'valid'), target_size=image_size, shuffle=False, batch_size=batch_size)
print("subdior to valid type {}".format(valid_generator.class_indices))

# 预测训练数据
print("predict_generator train {}".format(math.ceil(train_generator.samples // batch_size + 1)))
train = model.predict(train_generator, steps=math.ceil(train_generator.samples // batch_size + 1))
print("train: {}".format(train.shape))
# 预测测试数据
print("predict_generator valid {}".format(math.ceil(valid_generator.samples // batch_size + 1)))
valid = model.predict(valid_generator, steps=math.ceil(valid_generator.samples // batch_size + 1))
print("valid: {}".format(valid.shape))

# 数据存储在train、valid、train_generator、valid_generator 中

ResNet50
Found 20787 images belonging to 10 classes.
subdior to train type {'c0': 0, 'c1': 1, 'c2': 2, 'c3': 3, 'c4': 4, 'c5': 5, 'c6': 6, 'c7': 7, 'c8': 8, 'c9': 9}
Found 1637 images belonging to 10 classes.
subdior to valid type {'c0': 0, 'c1': 1, 'c2': 2, 'c3': 3, 'c4': 4, 'c5': 5, 'c6': 6, 'c7': 7, 'c8': 8, 'c9': 9}
predict_generator train 325
train: (20787, 2048)
predict_generator valid 26
valid: (1637, 2048)


In [29]:
with h5py.File(os.path.join("models", tag, "bottleneck_{}.h5".format(MODEL.__name__)), 'w') as h:
    h.create_dataset("train", data=train)
    h.create_dataset("valid", data=valid)
    h.create_dataset("label", data=train_generator.classes)
    h.create_dataset("valid_label", data=valid_generator.classes)
print("Data saved to bottleneck_{}.h5 successfully.".format(MODEL.__name__))


Data saved to bottleneck_ResNet50.h5 successfully.


In [39]:
# 模型加载权重文件：InceptionV3
input_tensor2 = Input((*image_size2, 3))
x2 = input_tensor2
if lambda_func:
    x = Lambda(lambda_func)(x)
base_model2 = MODEL2(input_tensor=x, weights=None, include_top=False)
model2 = Model(base_model2.input, GlobalAveragePooling2D()(base_model2.output))
model2.load_weights(os.path.join("models", weight_file2), by_name=True)

# 当前模型
print(MODEL2.__name__)
# 训练数据 生成器
train_gen2 = ImageDataGenerator(
    featurewise_std_normalization=featurewise_std_normalization2,
    samplewise_std_normalization=False,
    rotation_range=10.,
    width_shift_range=0.05,
    height_shift_range=0.05,
    shear_range=0.1,
    zoom_range=0.1,
)
gen2 = ImageDataGenerator(
    featurewise_std_normalization=featurewise_std_normalization2,
    samplewise_std_normalization=False,
)

batch_size = 64
# 加载训练数据
train_generator2 = train_gen2.flow_from_directory(os.path.join(dir, 'train'), target_size=image_size2, shuffle=False, batch_size=batch_size)
print("subdior to train type {}".format(train_generator2.class_indices))
# 加载验证数据
valid_generator2 = gen2.flow_from_directory(os.path.join(dir, 'valid'), target_size=image_size2, shuffle=False, batch_size=batch_size)
print("subdior to valid type {}".format(valid_generator2.class_indices))

# 预测训练数据
print("predict_generator train {}".format(math.ceil(train_generator2.samples // batch_size + 1)))
train2 = model.predict(train_generator2, steps=math.ceil(train_generator2.samples // batch_size + 1))
print("train: {}".format(train.shape))
# 预测测试数据
print("predict_generator valid {}".format(math.ceil(valid_generator2.samples // batch_size + 1)))
valid2 = model.predict(valid_generator2, steps=math.ceil(valid_generator2.samples // batch_size + 1))
print("valid: {}".format(valid.shape))

# 数据存储在train、valid、train_generator、valid_generator 中

InceptionV3
Found 20787 images belonging to 10 classes.
subdior to train type {'c0': 0, 'c1': 1, 'c2': 2, 'c3': 3, 'c4': 4, 'c5': 5, 'c6': 6, 'c7': 7, 'c8': 8, 'c9': 9}
Found 1637 images belonging to 10 classes.
subdior to valid type {'c0': 0, 'c1': 1, 'c2': 2, 'c3': 3, 'c4': 4, 'c5': 5, 'c6': 6, 'c7': 7, 'c8': 8, 'c9': 9}
predict_generator train 325
train: (20787, 2048)
predict_generator valid 26
valid: (1637, 2048)


In [42]:
with h5py.File(os.path.join("models", tag, "bottleneck_{}.h5".format(MODEL2.__name__)), 'w') as h2:
    h2.create_dataset("train", data=train2)
    h2.create_dataset("valid", data=valid2)
    h2.create_dataset("label", data=train_generator2.classes)
    h2.create_dataset("valid_label", data=valid_generator2.classes)
print("Data saved to bottleneck_{}.h5 successfully.".format(MODEL2.__name__))


Data saved to bottleneck_InceptionV3.h5 successfully.


In [69]:
"""
比较h5文件的差异
"""
import h5py
import numpy as np
from tensorflow.keras.models import load_model

def compare_h5(file1, file2):
    with h5py.File(os.path.join("models", tag, file1), 'r') as f1,h5py.File(os.path.join("models", tag, file2), 'r') as f2:
        # 比较文件中的所有键（数据集/属性）
        keys1 = set(f1.keys())
        keys2 = set(f2.keys())
        
        # 打印差异
        print("Keys only in file1:", keys1 - keys2)
        print("Keys only in file2:", keys2 - keys1)
        i = 0
        j=0
        # 比较每个数据集的内容
        for key in keys1 & keys2:
            j+=1
            dataset1 = f1[key][...]
            dataset2 = f2[key][...]
#             print("dataset1",dataset1)
#             print("dataset2",dataset2)

            if not (dataset1 == dataset2).all():
                i+=1
                print(f"Difference found in dataset: {key}")
                print(f"File1 {key}:\n", dataset1)
                print(f"File2 {key}:\n", dataset2)
        
        print(j)
        print(i)
compare_h5('bottleneck_InceptionV3.h5', 'bottleneck_Xception.h5')



Keys only in file1: set()
Keys only in file2: set()
Difference found in dataset: valid
File1 valid:
 [[0.2246282  0.09479816 0.22710085 ... 0.69714415 0.15253998 0.19852418]
 [0.12403081 0.49175826 0.13511154 ... 0.35331428 0.10390443 0.21329199]
 [0.19812007 0.33401868 0.1574735  ... 0.32233372 0.12638298 0.2093997 ]
 ...
 [0.34972832 0.6115425  0.36129025 ... 0.36448795 0.08421514 0.14278704]
 [0.27985427 0.5858992  0.32419595 ... 0.3996869  0.23800059 0.07301176]
 [0.59768915 0.10676511 0.09687085 ... 0.41471806 0.5666444  0.36041027]]
File2 valid:
 [[0.4789863  0.06350634 0.29304647 ... 0.01331133 0.13000038 0.05837492]
 [0.36633962 0.22098663 0.2565125  ... 0.07923584 0.08195297 0.10843932]
 [0.41572216 0.4418104  0.47690684 ... 0.05978933 0.149256   0.0111434 ]
 ...
 [0.09337121 0.4757786  0.11805671 ... 0.14525647 0.18383819 0.1445847 ]
 [0.08573153 0.38129157 0.15797186 ... 0.26685232 0.15418164 0.10451419]
 [0.51839924 0.27710393 0.2490445  ... 0.04606501 0.07823636 0.0414037 

In [80]:
"""
对生成test模型的测试,失败
"""
def write_gap_test(tag, MODEL, weight_file, image_size, lambda_func=None, featurewise_std_normalization=True):
    # 输入张量
    input_tensor = Input((*image_size, 3))
    x = input_tensor
    if lambda_func:
        x = Lambda(lambda_func)(x)  # 使用lambda函数进行额外的预处理
    # 加载基本模型，不包括顶部的全连接层
    base_model = MODEL(input_tensor=x, weights=None, include_top=False)
    # 构建最终的模型，使用全局平均池化
    model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output))
    # 加载权重文件
    model.load_weights("models/" + weight_file, by_name=True)

    print(MODEL.__name__)
    
    # 定义测试数据的生成器
    gen = ImageDataGenerator(
        featurewise_std_normalization=featurewise_std_normalization,
        samplewise_std_normalization=False,
    )
    
    batch_size = 64# 加载测试数据
    test_generator = gen.flow_from_directory(
        os.path.join(dir, 'test'),
        target_size=image_size,
        shuffle=False,
        batch_size=batch_size,
        class_mode=None  # 不使用标签
    )

    # 计算预测步骤数
    steps = math.ceil(test_generator.samples / batch_size)

    # 预测测试数据
    test = model.predict(test_generator, steps=steps)
    print("test shape: {}".format(test.shape))

    # 将预测结果保存到文件
    print("begin create database {}".format(MODEL.__name__))
    with h5py.File(os.path.join("models", tag, "bottleneck_{}_test.h5".format(MODEL.__name__)), 'w') as h:
        h.create_dataset("test", data=test)

    print("write_gap_test {} succeeded".format(MODEL.__name__))


In [81]:
write_gap_test("finetune", ResNet50, resnet50_weight_file, (240, 320))


ResNet50
Found 0 images belonging to 0 classes.


ValueError: Asked to retrieve element 0, but the Sequence has length 0