In [None]:
import tensorflow as tf, platform, os
print("Python:", platform.python_version())
print("TensorFlow:", tf.__version__)
print("GPUs:", tf.config.list_physical_devices('GPU'))
!nvidia-smi

In [None]:
!pip -q install kaggle scikit-learn opencv-python matplotlib

In [None]:
# 1) Upload kaggle.json (Account → Create New API Token on kaggle.com)
from google.colab import files
files.upload()  # choose kaggle.json

# 2) Put it where Kaggle CLI expects
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# 3) Sanity check
!kaggle --version

In [None]:
# Download the dataset zip to /content/data
!mkdir -p /content/data
!kaggle datasets download -d xhlulu/140k-real-and-fake-faces -p /content/data

# Unzip quietly and overwrite if it already exists
!rm -rf /content/data/real_vs_fake
!unzip -qo /content/data/140k-real-and-fake-faces.zip -d /content/data

# Show structure we’ll use
!ls -la /content/data/real_vs_fake/real-vs-fake

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

IMG_SIZE = (224, 224)
BATCH_SIZE = 32  # if you hit OOM on GPU, lower to 16 or 8

TRAIN_DIR = "/content/data/real_vs_fake/real-vs-fake/train"
VAL_DIR   = "/content/data/real_vs_fake/real-vs-fake/valid"

def get_data_generators(train_dir=TRAIN_DIR, val_dir=VAL_DIR,
                        img_size=IMG_SIZE, batch_size=BATCH_SIZE, seed=42):
    train_datagen = ImageDataGenerator(rescale=1./255)
    val_datagen   = ImageDataGenerator(rescale=1./255)

    train_gen = train_datagen.flow_from_directory(
        train_dir, target_size=img_size, batch_size=batch_size,
        class_mode='binary', color_mode='rgb', seed=seed
    )
    val_gen = val_datagen.flow_from_directory(
        val_dir, target_size=img_size, batch_size=batch_size,
        class_mode='binary', color_mode='rgb', shuffle=False
    )
    return train_gen, val_gen

train_gen, val_gen = get_data_generators()
print("Train samples:", train_gen.samples, "| Val samples:", val_gen.samples)
print("Classes:", train_gen.class_indices)

In [None]:
import json, os
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import GlobalAveragePooling2D, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

EPOCHS = 3  # start small; raise to 10–15 once you confirm it runs
OUT_DIR = "/content/outputs"
MODELS_DIR = f"{OUT_DIR}/models"
RESULTS_DIR = f"{OUT_DIR}/results"
os.makedirs(MODELS_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

def build_model(input_shape=(224, 224, 3)):
    try:
        print("Trying EfficientNetB0 with ImageNet weights…")
        base = EfficientNetB0(weights='imagenet', include_top=False, input_shape=input_shape)
    except Exception as e:
        print("⚠️ Could not load ImageNet weights, falling back to random init.\n", e)
        base = EfficientNetB0(weights=None, include_top=False, input_shape=input_shape)
    base.trainable = False
    x = GlobalAveragePooling2D()(base.output)
    x = Dropout(0.4)(x)
    out = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=base.input, outputs=out)
    model.compile(optimizer=Adam(1e-4), loss='binary_crossentropy', metrics=['accuracy'])
    return model

model = build_model((IMG_SIZE[0], IMG_SIZE[1], 3))

callbacks = [
    ModelCheckpoint(f"{MODELS_DIR}/deepfake_efficientnetb0.h5",
                    save_best_only=True, monitor='val_accuracy', mode='max'),
    EarlyStopping(patience=3, restore_best_weights=True, monitor='val_accuracy', mode='max'),
    ReduceLROnPlateau(patience=2, factor=0.5, monitor='val_loss')
]

In [None]:
import matplotlib.pyplot as plt

history = model.fit(train_gen, validation_data=val_gen,
                    epochs=EPOCHS, callbacks=callbacks, verbose=1)

# Save class indices mapping
with open(f"{MODELS_DIR}/class_indices.json", "w") as f:
    json.dump(train_gen.class_indices, f, indent=2)

# Plot & save training curves
plt.figure()
plt.plot(history.history['accuracy'], label='train_acc')
plt.plot(history.history['val_accuracy'], label='val_acc')
plt.title('Accuracy'); plt.xlabel('epoch'); plt.ylabel('acc'); plt.legend()
plt.savefig(f"{RESULTS_DIR}/acc_curve.png"); plt.close()

plt.figure()
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.title('Loss'); plt.xlabel('epoch'); plt.ylabel('loss'); plt.legend()
plt.savefig(f"{RESULTS_DIR}/loss_curve.png"); plt.close()

print("Saved model to:", f"{MODELS_DIR}/deepfake_efficientnetb0.h5")
print("Saved curves to:", RESULTS_DIR)

In [None]:
import numpy as np, matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.models import load_model

model_path = f"{MODELS_DIR}/deepfake_efficientnetb0.h5"
if not os.path.exists(model_path):
    raise FileNotFoundError("Model not found. Run the training cell first.")

model = load_model(model_path)

# fresh val generator (no shuffle)
_, val_gen_eval = get_data_generators()

preds = model.predict(val_gen_eval, verbose=1).ravel()
y_pred = (preds >= 0.5).astype(int)
y_true = val_gen_eval.classes
labels = list(val_gen_eval.class_indices.keys())

# classification report
report = classification_report(y_true, y_pred, target_names=labels, zero_division=0)
print(report)
with open(f"{RESULTS_DIR}/classification_report.txt", "w") as f:
    f.write(report)

# confusion matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure()
plt.imshow(cm, cmap='Blues')
plt.title("Confusion Matrix"); plt.colorbar()
plt.xticks(range(len(labels)), labels, rotation=45)
plt.yticks(range(len(labels)), labels)
plt.tight_layout()
plt.savefig(f"{RESULTS_DIR}/confusion_matrix.png")
plt.close()

print("Saved evaluation to:", RESULTS_DIR)

In [None]:
# Zip outputs and download to your machine
!zip -rq /content/outputs.zip /content/outputs
from google.colab import files
files.download('/content/outputs.zip')