In [1]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Conv2D, BatchNormalization, MaxPooling2D,
    Flatten, Dense, Dropout, Activation
)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import Sequence # <-- 1. 커스텀 제너레이터를 위해 추가
import h5py
import os
import numpy as np
from sklearn.model_selection import train_test_split # <-- 인덱스 분할을 위해 사용
import numpy as np
import cv2
import glob
from tqdm import tqdm

In [3]:
def build_alexnet_like(input_shape=(224, 224, 3)):
    """
    AlexNet 구조를 모방한 Keras Sequential 모델을 생성합니다.
    (Conv 5개, FC 3개)
    """

    model = Sequential(name="AlexNet_Like_Sequential")

    # --- Conv 1 ---
    # 첫 번째 레이어에 input_shape를 지정합니다.
    model.add(Conv2D(96, (11, 11), strides=(4, 4), padding='same', input_shape=input_shape))
    # model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))

    # --- Conv 2 ---
    model.add(Conv2D(256, (5, 5), padding='same'))
    # model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))

    # --- Conv 3 ---
    model.add(Conv2D(384, (3, 3), padding='same'))
    # model.add(BatchNormalization())
    model.add(Activation('relu'))

    # --- Conv 4 ---
    model.add(Conv2D(384, (3, 3), padding='same'))
    # model.add(BatchNormalization())
    model.add(Activation('relu'))

    # --- Conv 5 ---
    model.add(Conv2D(256, (3, 3), padding='same'))
    # model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))

    # --- FC Layers (Flatten) ---
    model.add(Flatten())

    # --- FC 1 ---
    model.add(Dense(1024))
    model.add(Activation('relu'))
    # model.add(Dropout(0.5))

    # --- FC 2 ---
    model.add(Dense(1024))
    model.add(Activation('relu'))
    # model.add(Dropout(0.5))

    # --- FC 3 (Output Layer) ---
    # 이진 분류 (Real: 0, Fake: 1)를 위해 1개의 뉴런과 sigmoid 사용
    model.add(Dense(1, activation='sigmoid'))

    return model

In [4]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [5]:
import zipfile
zip_file_name = '/content/drive/MyDrive/기학기/real_fake_dataset.zip'
extraction_dir = '/content/dataset'

with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
    zip_ref.extractall(extraction_dir)



In [6]:
def load_and_preprocess(image_path, label):
    # 1. 이미지 읽기
    image = tf.io.read_file(image_path)
    # JPG, PNG 등 다양한 포맷 디코딩
    image = tf.image.decode_image(image, channels=3, expand_animations=False)
    # 2. 크기가 제각각인 이미지를 비율 유지 + 패딩하여 224x224로 만듦
    # (tf.image.resize는 기본적으로 비율을 찌그러뜨리므로, resize_with_pad 사용)
    image = tf.image.resize_with_pad(image, IMG_SIZE, IMG_SIZE)
    # 3. 0-1 정규화
    image = tf.cast(image, tf.float32) / 255.0
    # 4. 데이터 증강 (학습 시에만 적용하기 위해 튜플로 반환)
    # (데이터 증강은 create_dataset 함수에서 적용)
    return image, label

def create_dataset(paths, labels, is_training=True):
    """
    이미지 경로 리스트로부터 tf.data.Dataset 파이프라인을 생성합니다.
    """
    # 1. 경로 리스트로부터 데이터셋 생성
    dataset = tf.data.Dataset.from_tensor_slices((paths, labels))
    # 2. 실시간 로드 및 전처리 (병렬 처리)
    dataset = dataset.map(load_and_preprocess, num_parallel_calls=AUTOTUNE)
    if is_training:
        # 3. 학습용 데이터셋: 셔플 및 데이터 증강
        dataset = dataset.shuffle(buffer_size=len(paths))
        # 간단한 좌우 반전 증강
        # dataset = dataset.map(lambda x, y: (tf.image.random_flip_left_right(x), y),
        #                       num_parallel_calls=AUTOTUNE)
    # 4. 배치 나누기
    dataset = dataset.batch(BATCH_SIZE)
    # 5. Prefetch: GPU가 연산하는 동안 CPU가 다음 배치를 준비
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)

    return dataset


In [7]:
# --- 설정 ---
MODEL_SAVE_PATH = '/drive/MyDrive/기학기/best_alexnet_model.h5'
face_real_dir = '/content/dataset/real_fake_dataset/face_real'
face_fake_dir = '/content/dataset/real_fake_dataset/face_fake'
real_paths = glob.glob(os.path.join(face_real_dir, "*.*"))
fake_paths = glob.glob(os.path.join(face_fake_dir, "*.*"))

IMG_SIZE = 224
EPOCHS = 20
BATCH_SIZE = 64
NUM_SAMPLES_TO_USE = 1000 # 사용할 총 샘플 수
AUTOTUNE = tf.data.AUTOTUNE


In [8]:
all_paths = real_paths + fake_paths
labels = [0] * len(real_paths) + [1] * len(fake_paths)

# --- 데이터 개수 샘플링 ---
_, target_paths, _, target_labels = train_test_split(
        all_paths, labels,
        test_size=NUM_SAMPLES_TO_USE,
        random_state=42,
        stratify=labels
    )

In [9]:
# --- 7 : 3으로 분할 ---
train_paths, temp_paths, train_labels, temp_labels = train_test_split(
        target_paths, target_labels,
        test_size=0.3,
        random_state=42,
        stratify=target_labels
    )

In [10]:
# --- 2 : 1로 분할 ---
val_paths, test_paths, val_labels, test_labels = train_test_split(
        temp_paths, temp_labels,
        test_size=(1/3),
        random_state=42,
        stratify=temp_labels
    )

In [11]:
train_ds = create_dataset(train_paths, train_labels, is_training=True)
val_ds = create_dataset(val_paths, val_labels, is_training=False)
test_ds = create_dataset(test_paths, test_labels, is_training=False)

In [12]:
# --- 모델 생성 및 컴파일 ---
print("모델 생성 중...")
model = build_alexnet_like(input_shape=(224, 224, 3))
# model.summary()
model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

checkpoint = ModelCheckpoint(
        MODEL_SAVE_PATH,
        monitor='val_accuracy', # 검증 정확도 기준
        save_best_only=True,
        mode='max',
        verbose=1
    )
early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=5, # 5 에포크 동안 검증 손실이 개선되지 않으면 중단
        restore_best_weights=True,
        verbose=1
    )

모델 생성 중...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
# --- 모델 학습 ---
history = model.fit(
        train_ds,
        epochs=EPOCHS,
        validation_data=val_ds,
        callbacks=[checkpoint, early_stopping]
        # workers, use_multiprocessing는 tf.data.AUTOTUNE이 자동 관리
    )
print("학습 완료!")

Epoch 1/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 946ms/step - accuracy: 0.5095 - loss: 0.6938
Epoch 1: val_accuracy improved from -inf to 0.50500, saving model to /drive/MyDrive/기학기/best_alexnet_model.h5




[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 1s/step - accuracy: 0.5092 - loss: 0.6938 - val_accuracy: 0.5050 - val_loss: 0.6916
Epoch 2/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step - accuracy: 0.5086 - loss: 0.6915
Epoch 2: val_accuracy improved from 0.50500 to 0.51500, saving model to /drive/MyDrive/기학기/best_alexnet_model.h5




[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 144ms/step - accuracy: 0.5080 - loss: 0.6916 - val_accuracy: 0.5150 - val_loss: 0.6928
Epoch 3/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step - accuracy: 0.5643 - loss: 0.6875
Epoch 3: val_accuracy improved from 0.51500 to 0.54000, saving model to /drive/MyDrive/기학기/best_alexnet_model.h5




[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 137ms/step - accuracy: 0.5646 - loss: 0.6874 - val_accuracy: 0.5400 - val_loss: 0.6855
Epoch 4/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step - accuracy: 0.5516 - loss: 0.6830
Epoch 4: val_accuracy improved from 0.54000 to 0.57000, saving model to /drive/MyDrive/기학기/best_alexnet_model.h5




[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 487ms/step - accuracy: 0.5508 - loss: 0.6831 - val_accuracy: 0.5700 - val_loss: 0.6816
Epoch 5/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step - accuracy: 0.5972 - loss: 0.6726
Epoch 5: val_accuracy did not improve from 0.57000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 112ms/step - accuracy: 0.5956 - loss: 0.6729 - val_accuracy: 0.5550 - val_loss: 0.6766
Epoch 6/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step - accuracy: 0.5728 - loss: 0.6640
Epoch 6: val_accuracy improved from 0.57000 to 0.58000, saving model to /drive/MyDrive/기학기/best_alexnet_model.h5




[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 153ms/step - accuracy: 0.5733 - loss: 0.6637 - val_accuracy: 0.5800 - val_loss: 0.6691
Epoch 7/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step - accuracy: 0.6266 - loss: 0.6455
Epoch 7: val_accuracy did not improve from 0.58000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 101ms/step - accuracy: 0.6254 - loss: 0.6463 - val_accuracy: 0.5800 - val_loss: 0.6769
Epoch 8/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step - accuracy: 0.6406 - loss: 0.6524
Epoch 8: val_accuracy did not improve from 0.58000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 103ms/step - accuracy: 0.6380 - loss: 0.6535 - val_accuracy: 0.5300 - val_loss: 0.6764
Epoch 9/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step - accuracy: 0.6198 - loss: 0.6531
Epoch 9: val_a



[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 158ms/step - accuracy: 0.6200 - loss: 0.6528 - val_accuracy: 0.6100 - val_loss: 0.6640
Epoch 10/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step - accuracy: 0.6182 - loss: 0.6463
Epoch 10: val_accuracy did not improve from 0.61000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 122ms/step - accuracy: 0.6195 - loss: 0.6457 - val_accuracy: 0.5900 - val_loss: 0.6675
Epoch 11/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step - accuracy: 0.6262 - loss: 0.6201
Epoch 11: val_accuracy did not improve from 0.61000
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 104ms/step - accuracy: 0.6270 - loss: 0.6200 - val_accuracy: 0.5850 - val_loss: 0.6710
Epoch 12/20
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step - accuracy: 0.6335 - loss: 0.6214
Epoch 12:

In [14]:
# --- 모델 평가 ---
try:
    best_model = load_model(MODEL_SAVE_PATH)
    print("저장된 모델 로드 성공.")

    test_loss, test_accuracy = best_model.evaluate(test_ds)

    print(f"\n===== 최종 테스트 결과 =====")
    print(f"  Test Loss: {test_loss:.4f}")
    print(f"  Test Accuracy: {test_accuracy * 100:.2f}%")

except Exception as e:
    print(f"모델 로드 또는 평가 중 오류 발생: {e}")
    print("학습이 정상적으로 완료되었는지 확인하세요.")

모델 로드 또는 평가 중 오류 발생: name 'load_model' is not defined
학습이 정상적으로 완료되었는지 확인하세요.
