# Re-run Model (best-model form experiment)

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from keras import layers, models, regularizers
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
import seaborn as sns
import json

# Set random seeds for reproducibility
def set_seed(seed=42):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

# Set a fixed seed
SEED = 42
set_seed(SEED)

# Define directories
data_dir = 'data'
synthetic_dir = os.path.join(data_dir, 'synthetic')
maps_dir = os.path.join(data_dir, 'maps')
labels_dir = os.path.join(data_dir, 'labels')
visualizations_dir = os.path.join(data_dir, 'visualizations')
experiment_dir = os.path.join('experiments', 'experiment_20250914_220130')  # Thay bằng timestamp của lần chạy trước
model_dir = os.path.join(experiment_dir, 'models')
results_dir = os.path.join(experiment_dir, 'results')
os.makedirs(model_dir, exist_ok=True)
os.makedirs(results_dir, exist_ok=True)

# Load saved data
X_1d = np.load(os.path.join(synthetic_dir, "synthetic_1d.npy"))
X_2d = np.load(os.path.join(maps_dir, "spectral_maps_gadf.npy"))
labels_df = pd.read_csv(os.path.join(labels_dir, "labels.csv"))
y = labels_df["label"].values

print("X_1d shape:", X_1d.shape)
print("X_2d shape:", X_2d.shape)
print("labels_df shape:", labels_df.shape)
print("Label distribution:\n", labels_df["label"].value_counts())

# Define baseline model (for completeness, in case needed)
def create_baseline_model(input_shape=880):
    model = models.Sequential([
        layers.Input(shape=(input_shape,)),
        layers.Reshape((input_shape, 1)),
        layers.Conv1D(filters=16, kernel_size=5, strides=1, activation='relu'),
        layers.AveragePooling1D(pool_size=2, strides=2),
        layers.Flatten(),
        layers.Dense(100, activation='relu'),
        layers.Dense(2, activation='sigmoid')
    ])
    return model

def train_baseline_model(baseline_model, noise_data, epochs=10, batch_size=32):
    try:
        labels = np.load(os.path.join(data_dir, 'labels_noise_pure_182.npy'))
        print("Đã tải nhãn từ labels_noise_pure_182.npy thành công!")
    except Exception as e:
        print(f"Lỗi khi tải nhãn: {e}. Sử dụng nhãn ngẫu nhiên.")
        labels = np.random.randint(0, 2, size=noise_data.shape[0])

    X = []
    y = []
    for i in range(noise_data.shape[0]):
        pure = noise_data[i, 0, 0, :, 0]
        noisy = noise_data[i, 0, 1, :, 0]
        X.append(noisy)
        y.append(labels[i])
    X = np.array(X)[:, :, np.newaxis]
    y = np.array(y)
    baseline_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    baseline_model.fit(X, y, epochs=epochs, batch_size=batch_size, validation_split=0.1)
    baseline_model.save_weights(os.path.join(data_dir, 'model.weights.h5'))
    return baseline_model

# Load noise data (if needed for baseline model training)
def load_noise_data():
    try:
        noise_data = np.load(os.path.join(data_dir, 'dataset_noise_pure_182.npy'))
        return noise_data
    except Exception as e:
        print(f"Lỗi khi tải dữ liệu nhiễu: {e}")
        return np.array([])

# Load or train baseline model
baseline_model = create_baseline_model(input_shape=880)
try:
    baseline_model.load_weights(os.path.join(data_dir, 'model.weights.h5'))
    print("Trọng số mô hình baseline đã được tải thành công!")
except Exception as e:
    print(f"Lỗi khi tải trọng số: {e}. Training baseline model.")
    noise_data = load_noise_data()
    if noise_data.size == 0:
        raise FileNotFoundError("Không thể tải dữ liệu nhiễu.")
    baseline_model = train_baseline_model(baseline_model, noise_data)

# Define DenseNet models
def build_1d_densenet(input_shape=(880, 1), num_classes=11, growth_rate=12):
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv1D(48, 7, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling1D(pool_size=2)(x)
    def dense_block(x, num_layers, filters):
        for _ in range(num_layers):
            y = layers.BatchNormalization()(x)
            y = layers.Activation('relu')(y)
            y = layers.Conv1D(filters, 3, padding='same', kernel_regularizer=regularizers.l2(0.0005))(y)
            x = layers.Concatenate()([x, y])
        return x
    def transition_layer(x):
        filters = x.shape[-1]
        x = layers.BatchNormalization()(x)
        x = layers.Activation('relu')(x)
        x = layers.Conv1D(filters // 2, 1, padding='same', kernel_regularizer=regularizers.l2(0.0005))(x)
        x = layers.MaxPooling1D(pool_size=2)(x)
        return x
    for _ in range(3):
        x = dense_block(x, num_layers=4, filters=growth_rate)
        x = transition_layer(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.4)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    return models.Model(inputs, outputs)

def build_2d_densenet(input_shape=(64, 64, 1), num_classes=11, growth_rate=12):
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(48, 3, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)
    def dense_block(x, num_layers, filters):
        for _ in range(num_layers):
            y = layers.BatchNormalization()(x)
            y = layers.Activation('relu')(y)
            y = layers.Conv2D(filters, 3, padding='same', kernel_regularizer=regularizers.l2(0.0005))(y)
            x = layers.Concatenate()([x, y])
        return x
    def transition_layer(x):
        filters = x.shape[-1]
        x = layers.BatchNormalization()(x)
        x = layers.Activation('relu')(x)
        x = layers.Conv2D(filters // 2, 1, padding='same', kernel_regularizer=regularizers.l2(0.0005))(x)
        x = layers.MaxPooling2D(pool_size=(2, 2))(x)
        return x
    for _ in range(3):
        x = dense_block(x, num_layers=4, filters=growth_rate)
        x = transition_layer(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.4)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    return models.Model(inputs, outputs)

# Define ResNet models
def build_1d_resnet(input_shape=(880, 1), num_classes=11):
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv1D(64, 5, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0001))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling1D(pool_size=2)(x)
    def residual_block(x, filters, kernel_size=3):
        shortcut = x
        x = layers.Conv1D(filters, kernel_size, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0001))(x)
        x = layers.BatchNormalization()(x)
        x = layers.Conv1D(filters, kernel_size, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0001))(x)
        x = layers.BatchNormalization()(x)
        if shortcut.shape[-1] != filters:
            shortcut = layers.Conv1D(filters, 1, padding='same')(shortcut)
        x = layers.Add()([shortcut, x])
        x = layers.Activation('relu')(x)
        return x
    x = residual_block(x, 64)
    x = residual_block(x, 64)
    x = layers.MaxPooling1D(pool_size=2)(x)
    x = residual_block(x, 128)
    x = residual_block(x, 128)
    x = residual_block(x, 128)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    return models.Model(inputs, outputs)

def build_2d_resnet(input_shape=(64, 64, 1), num_classes=11):
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(32, 3, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)
    def residual_block(x, filters, kernel_size=3):
        shortcut = x
        x = layers.Conv2D(filters, kernel_size, padding='same', activation='relu', kernel_regularizer=regularizers.l2(0.0005))(x)
        x = layers.BatchNormalization()(x)
        x = layers.Conv2D(filters, kernel_size, padding='same')(x)
        x = layers.BatchNormalization()(x)
        if shortcut.shape[-1] != filters:
            shortcut = layers.Conv2D(filters, 1, padding='same')(shortcut)
        x = layers.Add()([shortcut, x])
        x = layers.Activation('relu')(x)
        return x
    x = residual_block(x, 32)
    x = residual_block(x, 32)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    return models.Model(inputs, outputs)

# Plot confusion matrix (updated version)
def plot_confusion_matrix(y_true, y_pred, title, filename):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='.2f', cmap='viridis', cbar=True,  # Changed to normalized values and viridis colormap
                xticklabels=[f"{i*10}% Ethanol" for i in range(11)],
                yticklabels=[f"{i*10}% Ethanol" for i in range(11)],
                annot_kws={"size": 10}, norm=plt.Normalize(vmin=0, vmax=np.max(cm)))
    plt.title(title, fontsize=14, pad=15)
    plt.xlabel('Predicted Label', fontsize=12)
    plt.ylabel('True Label', fontsize=12)
    plt.savefig(os.path.join(visualizations_dir, filename), dpi=300, bbox_inches='tight')
    plt.close()

# Data augmentation
data_augmentation_1d = models.Sequential([
    layers.Lambda(lambda x: x + tf.random.normal(tf.shape(x), mean=0.0, stddev=0.05)),
    layers.Lambda(lambda x: x * tf.random.uniform((), 0.8, 1.2)),
    layers.Lambda(lambda x: tf.roll(x, shift=tf.random.uniform((), -5, 5, dtype=tf.int32), axis=1))
])
data_augmentation_2d = models.Sequential([
    layers.Lambda(lambda x: x + tf.random.normal(tf.shape(x), mean=0.0, stddev=0.05)),
    layers.Lambda(lambda x: x * tf.random.uniform((), 0.8, 1.2)),
    layers.Lambda(lambda x: tf.roll(x, shift=tf.random.uniform((), -5, 5, dtype=tf.int32), axis=1))
])

# Split data
X_1d_train, X_1d_test, y_train, y_test = train_test_split(X_1d, y, test_size=0.2, random_state=42)
X_2d_train, X_2d_test, y_train_2d, y_test_2d = train_test_split(X_2d, y, test_size=0.2, random_state=42)

# Compute class weights
class_weights = compute_class_weight('balanced', classes=np.arange(11), y=y)
class_weight = {i: w for i, w in enumerate(class_weights)}

# Train models with fresh optimizers
early_stopping = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

# DenseNet 1D
tf.keras.backend.clear_session()
lr_schedule_1d_densenet = keras.optimizers.schedules.CosineDecay(initial_learning_rate=0.001, decay_steps=10*len(X_1d_train)//64)
optimizer_1d_densenet = keras.optimizers.Adam(learning_rate=lr_schedule_1d_densenet)
densenet_1d = models.Sequential([data_augmentation_1d, build_1d_densenet()])
densenet_1d.compile(optimizer=optimizer_1d_densenet, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
densenet_1d.fit(X_1d_train, y_train, validation_split=0.1, epochs=10, batch_size=64,
                callbacks=[keras.callbacks.ModelCheckpoint(os.path.join(model_dir, "best_densenet_1d.keras"), save_best_only=True), early_stopping],
                class_weight=class_weight)

# DenseNet 2D
tf.keras.backend.clear_session()
lr_schedule_2d_densenet = keras.optimizers.schedules.CosineDecay(initial_learning_rate=0.001, decay_steps=10*len(X_2d_train)//32)
optimizer_2d_densenet = keras.optimizers.Adam(learning_rate=lr_schedule_2d_densenet)
densenet_2d = models.Sequential([data_augmentation_2d, build_2d_densenet()])
densenet_2d.compile(optimizer=optimizer_2d_densenet, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
densenet_2d.fit(X_2d_train, y_train_2d, validation_split=0.1, epochs=10, batch_size=32,
                callbacks=[keras.callbacks.ModelCheckpoint(os.path.join(model_dir, "best_densenet_2d.keras"), save_best_only=True), early_stopping],
                class_weight=class_weight)

# ResNet 1D
tf.keras.backend.clear_session()
lr_schedule_1d_resnet = keras.optimizers.schedules.CosineDecay(initial_learning_rate=0.001, decay_steps=10*len(X_1d_train)//64)
optimizer_1d_resnet = keras.optimizers.Adam(learning_rate=lr_schedule_1d_resnet)
resnet_1d = models.Sequential([data_augmentation_1d, build_1d_resnet()])
resnet_1d.compile(optimizer=optimizer_1d_resnet, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
resnet_1d.fit(X_1d_train, y_train, validation_split=0.1, epochs=10, batch_size=64,
              callbacks=[keras.callbacks.ModelCheckpoint(os.path.join(model_dir, "best_resnet_1d.keras"), save_best_only=True), early_stopping],
              class_weight=class_weight)

# ResNet 2D
tf.keras.backend.clear_session()
lr_schedule_2d_resnet = keras.optimizers.schedules.CosineDecay(initial_learning_rate=0.001, decay_steps=10*len(X_2d_train)//32)
optimizer_2d_resnet = keras.optimizers.Adam(learning_rate=lr_schedule_2d_resnet)
resnet_2d = models.Sequential([data_augmentation_2d, build_2d_resnet()])
resnet_2d.compile(optimizer=optimizer_2d_resnet, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
resnet_2d.fit(X_2d_train, y_train_2d, validation_split=0.1, epochs=10, batch_size=32,
              callbacks=[keras.callbacks.ModelCheckpoint(os.path.join(model_dir, "best_resnet_2d.keras"), save_best_only=True), early_stopping],
              class_weight=class_weight)

# Evaluate
y_pred_1d_densenet = densenet_1d.predict(X_1d_test)
y_pred_2d_densenet = densenet_2d.predict(X_2d_test)
y_pred_1d_resnet = resnet_1d.predict(X_1d_test)
y_pred_2d_resnet = resnet_2d.predict(X_2d_test)

y_pred_1d_densenet_labels = np.argmax(y_pred_1d_densenet, axis=1)
y_pred_2d_densenet_labels = np.argmax(y_pred_2d_densenet, axis=1)
y_pred_1d_resnet_labels = np.argmax(y_pred_1d_resnet, axis=1)
y_pred_2d_resnet_labels = np.argmax(y_pred_2d_resnet, axis=1)

densenet_1d_acc = np.mean(y_pred_1d_densenet_labels == y_test)
densenet_2d_acc = np.mean(y_pred_2d_densenet_labels == y_test_2d)
resnet_1d_acc = np.mean(y_pred_1d_resnet_labels == y_test)
resnet_2d_acc = np.mean(y_pred_2d_resnet_labels == y_test_2d)

densenet_1d_precision = precision_score(y_test, y_pred_1d_densenet_labels, average='macro')
densenet_2d_precision = precision_score(y_test_2d, y_pred_2d_densenet_labels, average='macro')
resnet_1d_precision = precision_score(y_test, y_pred_1d_resnet_labels, average='macro')
resnet_2d_precision = precision_score(y_test_2d, y_pred_2d_resnet_labels, average='macro')

densenet_1d_recall = recall_score(y_test, y_pred_1d_densenet_labels, average='macro')
densenet_2d_recall = recall_score(y_test_2d, y_pred_2d_densenet_labels, average='macro')
resnet_1d_recall = recall_score(y_test, y_pred_1d_resnet_labels, average='macro')
resnet_2d_recall = recall_score(y_test_2d, y_pred_2d_resnet_labels, average='macro')

densenet_1d_f1 = f1_score(y_test, y_pred_1d_densenet_labels, average='macro')
densenet_2d_f1 = f1_score(y_test_2d, y_pred_2d_densenet_labels, average='macro')
resnet_1d_f1 = f1_score(y_test, y_pred_1d_resnet_labels, average='macro')
resnet_2d_f1 = f1_score(y_test_2d, y_pred_2d_resnet_labels, average='macro')

print("\nKết quả đánh giá:")
print(f"DenseNet 1D - Accuracy: {densenet_1d_acc:.4f}, Precision: {densenet_1d_precision:.4f}, Recall: {densenet_1d_recall:.4f}, F1: {densenet_1d_f1:.4f}")
print(f"DenseNet 2D (GADF) - Accuracy: {densenet_2d_acc:.4f}, Precision: {densenet_2d_precision:.4f}, Recall: {densenet_2d_recall:.4f}, F1: {densenet_2d_f1:.4f}")
print(f"ResNet 1D - Accuracy: {resnet_1d_acc:.4f}, Precision: {resnet_1d_precision:.4f}, Recall: {resnet_1d_recall:.4f}, F1: {resnet_1d_f1:.4f}")
print(f"ResNet 2D (GADF) - Accuracy: {resnet_2d_acc:.4f}, Precision: {resnet_2d_precision:.4f}, Recall: {resnet_2d_recall:.4f}, F1: {resnet_2d_f1:.4f}")

# Save results
plot_confusion_matrix(y_test, y_pred_1d_densenet_labels, "Confusion Matrix - DenseNet 1D", "cm_densenet_1d.png")
plot_confusion_matrix(y_test_2d, y_pred_2d_densenet_labels, "Confusion Matrix - DenseNet 2D (GADF)", "cm_densenet_2d_gadf.png")
plot_confusion_matrix(y_test, y_pred_1d_resnet_labels, "Confusion Matrix - ResNet 1D", "cm_resnet_1d.png")
plot_confusion_matrix(y_test_2d, y_pred_2d_resnet_labels, "Confusion Matrix - ResNet 2D (GADF)", "cm_resnet_2d_gadf.png")

densenet_1d.save(os.path.join(model_dir, 'densenet_1d_full.keras'))
densenet_2d.save(os.path.join(model_dir, 'densenet_2d_full.keras'))
resnet_1d.save(os.path.join(model_dir, 'resnet_1d_full.keras'))
resnet_2d.save(os.path.join(model_dir, 'resnet_2d_full.keras'))

np.save(os.path.join(results_dir, 'y_pred_1d_densenet.npy'), y_pred_1d_densenet)
np.save(os.path.join(results_dir, 'y_pred_2d_densenet.npy'), y_pred_2d_densenet)
np.save(os.path.join(results_dir, 'y_pred_1d_resnet.npy'), y_pred_1d_resnet)
np.save(os.path.join(results_dir, 'y_pred_2d_resnet.npy'), y_pred_2d_resnet)

metrics = {
    'densenet_1d': {'accuracy': densenet_1d_acc, 'precision': densenet_1d_precision, 'recall': densenet_1d_recall, 'f1': densenet_1d_f1},
    'densenet_2d': {'accuracy': densenet_2d_acc, 'precision': densenet_2d_precision, 'recall': densenet_2d_recall, 'f1': densenet_2d_f1},
    'resnet_1d': {'accuracy': resnet_1d_acc, 'precision': resnet_1d_precision, 'recall': resnet_1d_recall, 'f1': resnet_1d_f1},
    'resnet_2d': {'accuracy': resnet_2d_acc, 'precision': resnet_2d_precision, 'recall': resnet_2d_recall, 'f1': resnet_2d_f1}
}
with open(os.path.join(results_dir, 'metrics.json'), 'w') as f:
    json.dump(metrics, f, indent=4)