1.import necessary liabraries

In [1]:
import warnings
warnings.filterwarnings('ignore')

# 核心库
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import List, Tuple, Dict, Any, Optional
import os
import json

# TensorFlow/Keras
try:
    import tensorflow as tf
    from tensorflow.keras import layers, models, Model
    from tensorflow.keras.applications import VGG16, ResNet50, EfficientNetB0
    from tensorflow.keras.optimizers import Adam, SGD, RMSprop
    from tensorflow.keras.regularizers import l1, l2, l1_l2
    from tensorflow.keras.utils import plot_model
    from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
except ImportError as e:
    print(f"TensorFlow import error: {e}")

# 设置
plt.style.use('ggplot')
sns.set_palette("husl")
np.random.seed(42)
tf.random.set_seed(42)

print("Libraries imported successfully!")


2025-09-25 16:49:56.163462: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758790196.184497   10661 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758790196.190646   10661 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-09-25 16:49:56.214900: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Libraries imported successfully!


2. Load Augmented Data

In [2]:
def load_augmented_data(data_path: str = '../data/augmented') -> Tuple:
    """加载增强后的数据"""
    print("Loading augmented data...")
    
    try:
        x_train = np.load(os.path.join(data_path, 'x_train_aug.npy'))
        y_train = np.load(os.path.join(data_path, 'y_train_aug.npy'))
        x_val = np.load(os.path.join(data_path, 'x_val_aug.npy'))
        y_val = np.load(os.path.join(data_path, 'y_val_aug.npy'))
        x_test = np.load(os.path.join(data_path, 'x_test_aug.npy'))
        y_test = np.load(os.path.join(data_path, 'y_test_aug.npy'))
        
        with open(os.path.join(data_path, 'class_names.txt'), 'r', encoding='utf-8') as f:
            class_names = [line.strip() for line in f.readlines()]
        
        # 转换为分类格式
        num_classes = len(class_names)
        y_train_cat = tf.keras.utils.to_categorical(y_train, num_classes)
        y_val_cat = tf.keras.utils.to_categorical(y_val, num_classes)
        y_test_cat = tf.keras.utils.to_categorical(y_test, num_classes)
        
        print("Augmented data loaded successfully!")
        print(f"Training shape: {x_train.shape} -> {y_train_cat.shape}")
        print(f"Validation shape: {x_val.shape} -> {y_val_cat.shape}")
        print(f"Test shape: {x_test.shape} -> {y_test_cat.shape}")
        print(f"Number of classes: {num_classes}")
        
        return (x_train, y_train_cat, x_val, y_val_cat, x_test, y_test_cat, 
                class_names, num_classes)
        
    except FileNotFoundError as e:
        print(f"Error loading data: {e}")
        raise

# 加载数据
(x_train, y_train, x_val, y_val, x_test, y_test, 
 CLASS_NAMES, NUM_CLASSES) = load_augmented_data()

Loading augmented data...
Augmented data loaded successfully!
Training shape: (40000, 32, 32, 3) -> (40000, 10)
Validation shape: (10000, 32, 32, 3) -> (10000, 10)
Test shape: (10000, 32, 32, 3) -> (10000, 10)
Number of classes: 10


3. Basic CNN Architecture

In [3]:
def create_simple_cnn(input_shape: Tuple[int, int, int] = (32, 32, 3), 
                     num_classes: int = 10) -> Model:
    """创建简单的CNN模型"""
    
    model = models.Sequential([
        # 第一个卷积块
        layers.Conv2D(32, (3, 3), activation='relu', padding='same', 
                     input_shape=input_shape),
        layers.BatchNormalization(),
        layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # 第二个卷积块
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # 第三个卷积块
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # 全连接层
        layers.Flatten(),
        layers.Dense(256, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ])
    
    return model

# 创建简单CNN
simple_cnn = create_simple_cnn(input_shape=x_train.shape[1:], num_classes=NUM_CLASSES)
print("Simple CNN created!")
simple_cnn.summary()

I0000 00:00:1758790228.085634   10661 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3600 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6


Simple CNN created!


4. Advanced CNN Architecture

In [4]:
def create_advanced_cnn(input_shape: Tuple[int, int, int] = (32, 32, 3), 
                       num_classes: int = 10) -> Model:
    """创建高级CNN模型（类似VGG风格）"""
    
    inputs = layers.Input(shape=input_shape)
    
    # Block 1
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2))(x)
    x = layers.Dropout(0.3)(x)
    
    # Block 2
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2))(x)
    x = layers.Dropout(0.3)(x)
    
    # Block 3
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2))(x)
    x = layers.Dropout(0.4)(x)
    
    # Block 4
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2), strides=(2, 2))(x)
    x = layers.Dropout(0.4)(x)
    
    # Classifier
    x = layers.Flatten()(x)
    x = layers.Dense(512, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(256, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    return model

# 创建高级CNN
advanced_cnn = create_advanced_cnn(input_shape=x_train.shape[1:], num_classes=NUM_CLASSES)
print("Advanced CNN created!")
advanced_cnn.summary()

Advanced CNN created!


5. Transfer Learning Models

In [5]:
def create_transfer_learning_model(base_model_name: str = 'VGG16',
                                  input_shape: Tuple[int, int, int] = (32, 32, 3),
                                  num_classes: int = 10,
                                  trainable_layers: int = 5) -> Model:
    """创建迁移学习模型"""
    
    # 选择基础模型
    if base_model_name == 'VGG16':
        base_model = VGG16(weights='imagenet', include_top=False, 
                          input_shape=input_shape)
    elif base_model_name == 'ResNet50':
        base_model = ResNet50(weights='imagenet', include_top=False, 
                             input_shape=input_shape)
    elif base_model_name == 'EfficientNetB0':
        base_model = EfficientNetB0(weights='imagenet', include_top=False, 
                                   input_shape=input_shape)
    else:
        raise ValueError(f"Unsupported base model: {base_model_name}")
    
    # 冻结基础模型层
    base_model.trainable = False
    
    # 解冻最后几层进行微调
    for layer in base_model.layers[-trainable_layers:]:
        layer.trainable = True
    
    # 添加自定义分类头
    inputs = layers.Input(shape=input_shape)
    x = base_model(inputs, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    return model, base_model

# 创建迁移学习模型
vgg_model, vgg_base = create_transfer_learning_model('VGG16', x_train.shape[1:], NUM_CLASSES)
print("VGG16 Transfer Learning model created!")
vgg_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 0us/step
VGG16 Transfer Learning model created!


6. Lightweight CNN for Comparison

In [6]:
def create_lightweight_cnn(input_shape: Tuple[int, int, int] = (32, 32, 3),
                          num_classes: int = 10) -> Model:
    """创建轻量级CNN模型"""
    
    model = models.Sequential([
        # 第一个块 - 深度可分离卷积
        layers.SeparableConv2D(32, (3, 3), activation='relu', padding='same', 
                              input_shape=input_shape),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.2),
        
        # 第二个块
        layers.SeparableConv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.3),
        
        # 第三个块
        layers.SeparableConv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.GlobalAveragePooling2D(),
        layers.Dropout(0.4),
        
        # 输出层
        layers.Dense(num_classes, activation='softmax')
    ])
    
    return model

# 创建轻量级CNN
lightweight_cnn = create_lightweight_cnn(input_shape=x_train.shape[1:], num_classes=NUM_CLASSES)
print("Lightweight CNN created!")
lightweight_cnn.summary()


Lightweight CNN created!


7. Model Compilation Functions

In [7]:
def compile_model(model: Model, 
                 optimizer: str = 'adam',
                 learning_rate: float = 0.001,
                 metrics: List[str] = ['accuracy']) -> Model:
    """编译模型"""
    
    if optimizer == 'adam':
        opt = Adam(learning_rate=learning_rate)
    elif optimizer == 'sgd':
        opt = SGD(learning_rate=learning_rate, momentum=0.9)
    elif optimizer == 'rmsprop':
        opt = RMSprop(learning_rate=learning_rate)
    else:
        opt = Adam(learning_rate=learning_rate)
    
    model.compile(
        optimizer=opt,
        loss='categorical_crossentropy',
        metrics=metrics
    )
    
    return model

def create_optimizer_configurations() -> List[Dict[str, Any]]:
    """创建不同的优化器配置"""
    
    configurations = [
        {'name': 'Adam_default', 'optimizer': 'adam', 'lr': 0.001},
        {'name': 'Adam_low_lr', 'optimizer': 'adam', 'lr': 0.0001},
        {'name': 'SGD_momentum', 'optimizer': 'sgd', 'lr': 0.01},
        {'name': 'SGD_low_lr', 'optimizer': 'sgd', 'lr': 0.001},
        {'name': 'RMSprop', 'optimizer': 'rmsprop', 'lr': 0.001}
    ]
    
    return configurations

# 测试编译
test_model = create_simple_cnn(x_train.shape[1:], NUM_CLASSES)
compile_model(test_model, 'adam', 0.001)
print("Model compilation tested successfully!")


Model compilation tested successfully!


8. Model Visualization

In [8]:
def visualize_model_architecture(model: Model, model_name: str) -> None:
    """可视化模型架构"""
    
    # 打印模型摘要
    print(f"\n{model_name} Architecture:")
    print("=" * 50)
    model.summary()
    
    # 尝试绘制模型图
    try:
        plot_model(model, to_file=f'../models/{model_name}_architecture.png', 
                  show_shapes=True, show_layer_names=True)
        print(f"Model architecture saved as {model_name}_architecture.png")
    except Exception as e:
        print(f"Could not plot model: {e}")

# 创建模型目录
os.makedirs('../models', exist_ok=True)

# 可视化模型架构
visualize_model_architecture(simple_cnn, "Simple_CNN")
visualize_model_architecture(advanced_cnn, "Advanced_CNN")
visualize_model_architecture(vgg_model, "VGG16_Transfer")


Simple_CNN Architecture:


You must install pydot (`pip install pydot`) for `plot_model` to work.
Model architecture saved as Simple_CNN_architecture.png

Advanced_CNN Architecture:


You must install pydot (`pip install pydot`) for `plot_model` to work.
Model architecture saved as Advanced_CNN_architecture.png

VGG16_Transfer Architecture:


You must install pydot (`pip install pydot`) for `plot_model` to work.
Model architecture saved as VGG16_Transfer_architecture.png


9. Model Complexity Analysis

In [9]:
def analyze_model_complexity(model: Model) -> Dict[str, Any]:
    """分析模型复杂度"""
    
    trainable_params = np.sum([tf.keras.backend.count_params(w) for w in model.trainable_weights])
    non_trainable_params = np.sum([tf.keras.backend.count_params(w) for w in model.non_trainable_weights])
    total_params = trainable_params + non_trainable_params
    
    # 计算层数
    num_layers = len(model.layers)
    
    # 计算模型大小（近似）
    model_size_mb = (total_params * 4) / (1024 * 1024)  # 假设32位浮点数
    
    complexity_info = {
        'trainable_params': trainable_params,
        'non_trainable_params': non_trainable_params,
        'total_params': total_params,
        'num_layers': num_layers,
        'model_size_mb': model_size_mb,
        'trainable_percentage': (trainable_params / total_params) * 100
    }
    
    return complexity_info

def compare_model_complexities(models_dict: Dict[str, Model]) -> None:
    """比较不同模型的复杂度"""
    
    print("Model Complexity Comparison:")
    print("=" * 80)
    print(f"{'Model Name':<20} {'Trainable Params':<15} {'Total Params':<15} "
          f"{'Layers':<8} {'Size (MB)':<10} {'Trainable %':<12}")
    print("-" * 80)
    
    complexities = {}
    for name, model in models_dict.items():
        complexity = analyze_model_complexity(model)
        complexities[name] = complexity
        
        print(f"{name:<20} {complexity['trainable_params']:<15,} "
              f"{complexity['total_params']:<15,} {complexity['num_layers']:<8} "
              f"{complexity['model_size_mb']:<10.2f} {complexity['trainable_percentage']:<12.1f}")

# 比较模型复杂度
models_to_compare = {
    'Simple_CNN': simple_cnn,
    'Advanced_CNN': advanced_cnn,
    'Lightweight_CNN': lightweight_cnn,
    'VGG16_Transfer': vgg_model
}

compare_model_complexities(models_to_compare)

Model Complexity Comparison:
Model Name           Trainable Params Total Params    Layers   Size (MB)  Trainable % 
--------------------------------------------------------------------------------
Simple_CNN           846,698         847,658         22       3.23       99.9        
Advanced_CNN         8,824,650       8,831,050       36       33.69      99.9        
Lightweight_CNN      13,189          13,637          13       0.05       96.7        
VGG16_Transfer       7,245,962       14,882,250      9        56.77      48.7        


10. Custom Model Configurations

In [10]:
def create_custom_model(config: Dict[str, Any]) -> Model:
    """根据配置创建自定义模型"""
    
    input_shape = config.get('input_shape', (32, 32, 3))
    num_classes = config.get('num_classes', 10)
    
    model = models.Sequential()
    model.add(layers.Input(shape=input_shape))
    
    # 添加卷积块
    for i, conv_config in enumerate(config['conv_blocks']):
        filters = conv_config['filters']
        kernel_size = conv_config['kernel_size']
        repetitions = conv_config.get('repetitions', 1)
        
        for j in range(repetitions):
            model.add(layers.Conv2D(filters, kernel_size, 
                                  activation='relu', 
                                  padding='same'))
            if conv_config.get('batch_norm', True):
                model.add(layers.BatchNormalization())
        
        model.add(layers.MaxPooling2D((2, 2)))
        dropout_rate = conv_config.get('dropout', 0.25)
        if dropout_rate > 0:
            model.add(layers.Dropout(dropout_rate))
    
    # 添加全连接层
    model.add(layers.Flatten())
    
    for dense_units in config.get('dense_layers', [256, 128]):
        model.add(layers.Dense(dense_units, activation='relu'))
        model.add(layers.Dropout(0.5))
    
    model.add(layers.Dense(num_classes, activation='softmax'))
    
    return model

# 自定义配置示例
custom_config = {
    'input_shape': x_train.shape[1:],
    'num_classes': NUM_CLASSES,
    'conv_blocks': [
        {'filters': 32, 'kernel_size': (3, 3), 'repetitions': 2, 'dropout': 0.2},
        {'filters': 64, 'kernel_size': (3, 3), 'repetitions': 2, 'dropout': 0.3},
        {'filters': 128, 'kernel_size': (3, 3), 'repetitions': 2, 'dropout': 0.4}
    ],
    'dense_layers': [512, 256]
}

custom_model = create_custom_model(custom_config)
print("Custom model created!")
custom_model.summary()

Custom model created!


11. Save Model Architectures

In [11]:
def save_model_architecture(model: Model, model_name: str, 
                           model_dir: str = '../models') -> None:
    """保存模型架构"""
    
    os.makedirs(model_dir, exist_ok=True)
    
    # 保存模型配置为JSON
    config = model.get_config()
    with open(os.path.join(model_dir, f'{model_name}_config.json'), 'w') as f:
        json.dump(config, f, indent=2)
    
    # 保存模型摘要
    with open(os.path.join(model_dir, f'{model_name}_summary.txt'), 'w') as f:
        model.summary(print_fn=lambda x: f.write(x + '\n'))
    
    print(f"Model architecture saved for {model_name}")

# 保存所有模型架构
models_dict = {
    'simple_cnn': simple_cnn,
    'advanced_cnn': advanced_cnn,
    'lightweight_cnn': lightweight_cnn,
    'vgg16_transfer': vgg_model,
    'custom_model': custom_model
}

for name, model in models_dict.items():
    save_model_architecture(model, name)

Model architecture saved for simple_cnn


Model architecture saved for advanced_cnn


Model architecture saved for lightweight_cnn


Model architecture saved for vgg16_transfer


Model architecture saved for custom_model


12. Summary Report

In [12]:
def generate_model_building_summary() -> None:
    """生成模型构建总结报告"""
    
    print("=" * 60)
    print("MODEL BUILDING SUMMARY REPORT")
    print("=" * 60)
    
    print(f"Input shape: {x_train.shape[1:]}")
    print(f"Number of classes: {NUM_CLASSES}")
    print(f"Models created: {len(models_dict)}")
    
    print(f"\nModel types:")
    print("  - Simple CNN (3 convolutional blocks)")
    print("  - Advanced CNN (VGG-style, 4 blocks)")
    print("  - Lightweight CNN (depthwise separable)")
    print("  - VGG16 Transfer Learning")
    print("  - Custom configurable model")
    
    print(f"\nArchitecture features:")
    print("  - Batch Normalization")
    print("  - Dropout regularization")
    print("  - L2 regularization (advanced model)")
    print("  - Global Average Pooling")
    
    print(f"\nModels saved to: ../models/")
    print("=" * 60)

# 生成总结
generate_model_building_summary()

print("Model building completed successfully!")

MODEL BUILDING SUMMARY REPORT
Input shape: (32, 32, 3)
Number of classes: 10
Models created: 5

Model types:
  - Simple CNN (3 convolutional blocks)
  - Advanced CNN (VGG-style, 4 blocks)
  - Lightweight CNN (depthwise separable)
  - VGG16 Transfer Learning
  - Custom configurable model

Architecture features:
  - Batch Normalization
  - Dropout regularization
  - L2 regularization (advanced model)
  - Global Average Pooling

Models saved to: ../models/
Model building completed successfully!
