## EfficientNet

In [31]:
#!c1.4
import os
import zipfile

import cv2
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.callbacks import EarlyStopping
from keras.layers import (
    BatchNormalization,
    Conv2D,
    Dense,
    Dropout,
    Flatten,
    GlobalAveragePooling2D,
    MaxPool2D,
)
from keras.models import Model, Sequential
from keras.preprocessing import image
from keras.utils import to_categorical
from PIL import Image, ImageChops, ImageEnhance
from scipy import stats
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.metrics import Precision, Recall, AUC

from data_prepare.dataset_tools import extract_zip_with_cleanup, prepare_and_save_data, create_data_generators
from data_prepare.f1score import F1Score

import pandas as pd
import matplotlib.pyplot as plt

from data_prepare.dataset_tools import extract_zip_with_cleanup, prepare_and_save_data, create_data_generators
from data_prepare.plots import plot_history, confusion_matrix_plot, roc_plot, precision_recall_plot

### Подготовка результатов

In [32]:
image_archive_path = "data/celeb/v1/"
fake_images_path, real_images_path = extract_zip_with_cleanup(image_archive_path)

In [33]:
train_dir, val_dir, test_dir = prepare_and_save_data(real_images_path, fake_images_path, output_dir="data/dataset/net", target_size=(224,224))

In [34]:
train_gen, val_gen, test_gen = create_data_generators(train_dir, val_dir, test_dir, target_size=(224,224))

Found 9075 images belonging to 2 classes.
Found 1944 images belonging to 2 classes.
Found 1946 images belonging to 2 classes.

Class indices: {'fake': 0, 'real': 1}
Train samples: 9075
Val samples: 1944
Test samples: 1946


### Обучение модели

In [10]:
def build_efficientnet_model(input_shape=(224, 224, 3)):
    base_model = EfficientNetB0(
        weights='imagenet', 
        include_top=False, 
        input_shape=input_shape
    )
    base_model.trainable = False
    
    model = Sequential(
        [
            base_model,
            GlobalAveragePooling2D(),
            Dropout(0.5),
            Dense(1, activation="sigmoid"),
        ]
    )
    
    model.compile(
        optimizer=Adam(learning_rate=1e-3),
        loss='binary_crossentropy',
        metrics=[
            tf.keras.metrics.BinaryAccuracy(name='accuracy'),  # Явное указание
            tf.keras.metrics.Precision(name="precision"),
            tf.keras.metrics.Recall(name="recall"),
            tf.keras.metrics.AUC(name="auc")
        ]
    )
    
    return model, base_model

In [None]:
def train_model(train_generator, val_generator):
    """
    Обучает модель на данных, загруженных в память

    Parameters:
        X_train (np.array): Тренировочные изображения
        X_test (np.array): Тестовые изображения
        y_train (np.array): Тренировочные метки
        y_test (np.array): Тестовые метки

    Returns:
        tuple: (model, history) - обученная модель и история обучения
    """
    model = build_efficientnet_model()

    history = model.fit(
        train_generator,
        batch_size=32,
        epochs=10,
        validation_data=val_generator,
        callbacks=[
            ModelCheckpoint("best_model.h5", monitor="val_pr_auc", save_best_only=True),
            EarlyStopping(monitor="val_loss", patience=3),
        ],
    )

    return model, history

In [16]:
trained_model, histories = train_model(train_gen, val_gen)

2025-04-30 05:12:14.760649: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Epoch 1/10


2025-04-30 05:12:20.037376: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential_3/efficientnetb0/block2b_drop/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer




2025-04-30 05:12:49.774575: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10


In [17]:
trained_model.save("efficient_deepfake_model.h5")

TypeError: Unable to serialize [2.0896919 2.1128857 2.1081853] to JSON. Unrecognized type <class 'tensorflow.python.framework.ops.EagerTensor'>.

In [None]:
trained_model = tf.keras.models.load_model("efficient_deepfake_model.h5")

In [9]:
#!gt4.1
test_loss, test_precision, test_recall, test_auc = trained_model.evaluate(test_gen)
print(f"\nTest Accuracy: {test_auc:.4f}")

2025-04-29 21:15:58.167842: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




ValueError: too many values to unpack (expected 4)

### Обучение с разморозкй слоев(finetume model)

In [35]:
def train_model_finetune(train_generator, val_generator, fine_tune=False, initial_epochs=10, fine_tune_epochs=10):
    """
    Двухэтапное обучение с последующей разморозкой слоев базовой модели.
    
    Parameters:
        train_generator: генератор тренировочных данных
        val_generator: генератор валидационных данных
        fine_tune: выполнять ли дообучение
        initial_epochs: количество эпох начального обучения
        fine_tune_epochs: количество эпох дообучения
    """
    model, base_model = build_efficientnet_model()
    
    history = model.fit(
        train_generator,
        epochs=initial_epochs,
        validation_data=val_generator,
        callbacks=[
            ModelCheckpoint("initial_model.h5", monitor="val_auc", save_best_only=True, mode="max"),
            EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True)
        ]
    )
    
    if not fine_tune:
        return model, history
    
    base_model.trainable = True
    
    for layer in base_model.layers[:int(len(base_model.layers)*0.7)]:
        layer.trainable = False
    
    model.compile(
        optimizer=Adam(learning_rate=1e-5),
        loss="binary_crossentropy",
        metrics=[
            tf.keras.metrics.Precision(name="precision"),
            tf.keras.metrics.Recall(name="recall"),
            tf.keras.metrics.AUC(name="auc"),
        ]
    )
    
    fine_tune_history = model.fit(
        train_generator,
        epochs=initial_epochs + fine_tune_epochs,
        initial_epoch=history.epoch[-1] + 1,
        validation_data=val_generator,
        callbacks=[
            ModelCheckpoint("fine_tuned_model.h5", monitor="val_auc", save_best_only=True, mode="max"),
            EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
        ]
    )
    
    # Объединяем истории обучения
    full_history = {
        k: history.history[k] + fine_tune_history.history[k]
        for k in history.history
    }
    
    return model, full_history

In [36]:
finetune_trained_model, finetune_histories = train_model_finetune(train_gen, val_gen, fine_tune=True, initial_epochs=5, fine_tune_epochs=10)

Epoch 1/5


  self._warn_if_super_not_called()


[1m 14/284[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m43s[0m 161ms/step - accuracy: 0.7228 - auc: 0.4696 - loss: 0.5773 - precision: 0.0719 - recall: 0.0233       

KeyboardInterrupt: 

In [29]:
finetune_history_df = pd.DataFrame(finetune_histories.history)
history_df.to_csv('results/net/finetune_training_history.csv', index=False)

NameError: name 'pd' is not defined

In [None]:
efficientnet_model_dir = os.path.join(models_dir, 'efficientnet_finetune_deepfake_model.h5')
finetune_trained_model.save(efficientnet_model_dir)

In [None]:
y_pred = finetune_trained_model.predict(test_gen)
y_pred_df = pd.DataFrame(y_pred)
y_pred_df.to_csv('results/net/y_pred.csv', index=False)

### Analyze results for finetuned model

In [None]:
y_pred = pd.read_csv('results/x/y_pred.csv')
y_true = test_gen.labels

In [None]:
finetune_loaded_history_df = pd.read_csv('results/finetune_training_history.csv')
finetune_loaded_history = {'history': finetune_loaded_history_df.to_dict()}