In [1]:
import zipfile
import os

# Replace 'cifake-dataset.zip' with your actual file name
with zipfile.ZipFile('archive.zip', 'r') as zip_ref:
    zip_ref.extractall('cifake_dataset')

# List the contents to verify
os.listdir('cifake_dataset')


['test', 'train']

In [2]:
import tensorflow as tf
import os
import random

# === CONFIG ===
base_dir = '/content/cifake_dataset'
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32

# Paths
train_fake_dir = os.path.join(base_dir, 'train', 'FAKE')
train_real_dir = os.path.join(base_dir, 'train', 'REAL')

# Get all image paths
fake_images = [os.path.join(train_fake_dir, fname) for fname in os.listdir(train_fake_dir)][:600]
real_images = [os.path.join(train_real_dir, fname) for fname in os.listdir(train_real_dir)][:600]

# Combine paths and labels
all_images = fake_images + real_images
labels = [1] * 600 + [0] * 600  # 1 = FAKE, 0 = REAL

# Shuffle
combined = list(zip(all_images, labels))
random.shuffle(combined)
all_images, labels = zip(*combined)

# === DATA AUGMENTATION FUNCTION ===
def load_and_augment(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, IMAGE_SIZE)
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=0.1)
    image = tf.image.random_contrast(image, 0.8, 1.2)
    image = image / 255.0  # normalize
    return image, label

# === BUILD DATASET ===
path_ds = tf.data.Dataset.from_tensor_slices((list(all_images), list(labels)))
dataset = path_ds.map(load_and_augment, num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.shuffle(1200).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)


In [3]:
fake_images = fake_images[:600]
real_images = real_images[:600]
labels = [1]*600 + [0]*600


In [4]:
from sklearn.model_selection import train_test_split

train_paths, test_paths, train_labels, test_labels = train_test_split(
    all_images, labels, test_size=0.2, stratify=labels, random_state=42
)


In [5]:
def load_and_preprocess(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.resize(image, [224, 224])
    image = image / 255.0
    return image, label

# Wrap training data
train_ds = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
train_ds = train_ds.map(load_and_preprocess).shuffle(80).batch(16)

# Wrap testing data
test_ds = tf.data.Dataset.from_tensor_slices((test_paths, test_labels))
test_ds = test_ds.map(load_and_preprocess).batch(16)

In [6]:
import os
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

# === CONFIGURATION ===
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 50

# === LOAD FILE PATHS ===
base_dir = 'cifake_dataset/train'  # <-- adjust if your folder is different
fake_images = [os.path.join(base_dir, 'FAKE', fname) for fname in os.listdir(os.path.join(base_dir, 'FAKE'))]
real_images = [os.path.join(base_dir, 'REAL', fname) for fname in os.listdir(os.path.join(base_dir, 'REAL'))]

# === LIMIT TO 12,000 IMAGES (6000 each) ===
fake_images = fake_images[:600]
real_images = real_images[:600]
all_images = fake_images + real_images
labels = [0]*600 + [1]*600  # 0 = fake, 1 = real

# === SPLIT DATA ===
train_paths, test_paths, train_labels, test_labels = train_test_split(
    all_images, labels, test_size=0.2, stratify=labels, random_state=42
)

# === PREPROCESSING FUNCTION ===
def load_and_preprocess(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, IMAGE_SIZE)
    image = tf.cast(image, tf.float32) / 255.0
    return image, label

# === DATASET PIPELINE ===
train_ds = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
train_ds = train_ds.map(load_and_preprocess).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

test_ds = tf.data.Dataset.from_tensor_slices((test_paths, test_labels))
test_ds = test_ds.map(load_and_preprocess).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# === CNN MODEL ===
cnn_model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Binary classification
])

cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# === EARLY STOPPING ===
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# === TRAIN THE MODEL ===
history = cnn_model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=EPOCHS,
    callbacks=[early_stop]
)


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 4s/step - accuracy: 0.4968 - loss: 0.9409 - val_accuracy: 0.6333 - val_loss: 0.6539
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m175s[0m 5s/step - accuracy: 0.6138 - loss: 0.6523 - val_accuracy: 0.7333 - val_loss: 0.5670
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 4s/step - accuracy: 0.6814 - loss: 0.5784 - val_accuracy: 0.7125 - val_loss: 0.5535
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 4s/step - accuracy: 0.7277 - loss: 0.5293 - val_accuracy: 0.7542 - val_loss: 0.5404
Epoch 5/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 4s/step - accuracy: 0.7266 - loss: 0.5299 - val_accuracy: 0.7292 - val_loss: 0.5406
Epoch 6/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 4s/step - accuracy: 0.7814 - loss: 0.4524 - val_accuracy: 0.7333 - val_loss: 0.5200
Epoch 7/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━

In [3]:
pip install transformers




In [8]:
import os
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

# === CONFIGURATION ===
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 50

# === LOAD FILE PATHS ===
base_dir = 'cifake_dataset/train'
fake_images = [os.path.join(base_dir, 'FAKE', fname) for fname in os.listdir(os.path.join(base_dir, 'FAKE'))]
real_images = [os.path.join(base_dir, 'REAL', fname) for fname in os.listdir(os.path.join(base_dir, 'REAL'))]

# === LIMIT TO 1200 IMAGES (600 each) ===
fake_images = fake_images[:600]
real_images = real_images[:600]
all_images = fake_images + real_images
labels = [0]*600 + [1]*600  # 0 = fake, 1 = real

# === SPLIT DATA ===
train_paths, test_paths, train_labels, test_labels = train_test_split(
    all_images, labels, test_size=0.2, stratify=labels, random_state=42
)

# === VGG Preprocessing ===
def load_and_preprocess(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, IMAGE_SIZE)
    image = tf.keras.applications.vgg16.preprocess_input(image)
    return image, label

# === DATASET PIPELINE ===
train_ds = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
train_ds = train_ds.map(load_and_preprocess).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

test_ds = tf.data.Dataset.from_tensor_slices((test_paths, test_labels))
test_ds = test_ds.map(load_and_preprocess).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# === LOAD BASE VGG16 ===
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # freeze VGG layers

# === BUILD MODEL ===
vgg_model = models.Sequential([
    base_model,
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

vgg_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# === EARLY STOPPING ===
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# === TRAIN THE MODEL ===
history = vgg_model.fit(
    train_ds,
    validation_data=test_ds,
    epochs=EPOCHS,
    callbacks=[early_stop]
)


Epoch 1/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m731s[0m 24s/step - accuracy: 0.6911 - loss: 6.0148 - val_accuracy: 0.8083 - val_loss: 3.4520
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m713s[0m 24s/step - accuracy: 0.8961 - loss: 1.5443 - val_accuracy: 0.8625 - val_loss: 1.3819
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m711s[0m 24s/step - accuracy: 0.9415 - loss: 0.4088 - val_accuracy: 0.8417 - val_loss: 1.3253
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m740s[0m 24s/step - accuracy: 0.9747 - loss: 0.1039 - val_accuracy: 0.8750 - val_loss: 1.0850
Epoch 5/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m802s[0m 26s/step - accuracy: 0.9787 - loss: 0.0799 - val_accuracy: 0.8708 - val_loss: 0.6611
Epoch 6/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m709s[0m 24s/step - accuracy: 0.9865 - loss: 0.0479 - val_accuracy: 0.8667 - val_loss: 0.6673
Epoch 7/50
[1m30/30[0m [3

In [10]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from transformers import ViTFeatureExtractor
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from tensorflow.data import Dataset

# === CONFIGURATION ===
IMAGE_SIZE = (224, 224)
DATA_PATH = 'cifake_dataset/train'
NUM_IMAGES_PER_CLASS = 600
BATCH_SIZE = 32

# === LOAD IMAGE PATHS ===
fake_paths = [os.path.join(DATA_PATH, 'FAKE', fname) for fname in os.listdir(os.path.join(DATA_PATH, 'FAKE'))][:NUM_IMAGES_PER_CLASS]
real_paths = [os.path.join(DATA_PATH, 'REAL', fname) for fname in os.listdir(os.path.join(DATA_PATH, 'REAL'))][:NUM_IMAGES_PER_CLASS]

all_paths = fake_paths + real_paths
all_labels = [0]*NUM_IMAGES_PER_CLASS + [1]*NUM_IMAGES_PER_CLASS  # 0 = FAKE, 1 = REAL

# === SPLIT DATA ===
train_paths, test_paths, train_labels, test_labels = train_test_split(
    all_paths, all_labels, test_size=0.2, stratify=all_labels, random_state=42
)

# === LOAD ViT FEATURE EXTRACTOR ===
feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224-in21k")

# === FEATURE EXTRACTION ===
def extract_vit_features(image_paths, labels):
    features = []
    for path in image_paths:
        img = Image.open(path).convert("RGB").resize(IMAGE_SIZE)
        inputs = feature_extractor(images=img, return_tensors="np")
        features.append(inputs["pixel_values"][0])  # shape: (3, 224, 224)
    return np.array(features), np.array(labels)

X_train, y_train = extract_vit_features(train_paths, train_labels)
X_test, y_test = extract_vit_features(test_paths, test_labels)

print(f"Train shape: {X_train.shape}, Labels: {y_train.shape}")
print(f"Test shape:  {X_test.shape}, Labels: {y_test.shape}")

# === CLASSIFIER MODEL ===
vit_classifier = Sequential([
    Flatten(input_shape=(3, 224, 224)),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification
])

vit_classifier.compile(
    optimizer=Adam(),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# === EARLY STOPPING CALLBACK ===
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# === TRAIN ===
vit_classifier.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    batch_size=BATCH_SIZE,
    epochs=50,
    callbacks=[early_stop]
)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Train shape: (960, 3, 224, 224), Labels: (960,)
Test shape:  (240, 3, 224, 224), Labels: (240,)


  super().__init__(**kwargs)


Epoch 1/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 644ms/step - accuracy: 0.5656 - loss: 14.7816 - val_accuracy: 0.6375 - val_loss: 14.3590
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 400ms/step - accuracy: 0.6408 - loss: 13.4218 - val_accuracy: 0.6708 - val_loss: 14.9627
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 403ms/step - accuracy: 0.7314 - loss: 8.4270 - val_accuracy: 0.6292 - val_loss: 11.1492
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 395ms/step - accuracy: 0.7205 - loss: 7.3934 - val_accuracy: 0.6833 - val_loss: 12.0369
Epoch 5/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 399ms/step - accuracy: 0.7351 - loss: 5.4699 - val_accuracy: 0.6542 - val_loss: 11.9282
Epoch 6/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 396ms/step - accuracy: 0.7321 - loss: 4.5511 - val_accuracy: 0.6792 - val_loss: 12.7364


<keras.src.callbacks.history.History at 0x7fb213555310>

In [7]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

# === Predict CNN ===
y_pred_cnn = cnn_model.predict(test_ds)
y_pred_cnn = (y_pred_cnn > 0.5).astype(int).flatten()

# === True Labels ===
y_true = np.concatenate([y for x, y in test_ds], axis=0)

# === Scores ===
print("===== CNN Test Results =====")
print(f"Accuracy : {accuracy_score(y_true, y_pred_cnn):.4f}")
print(f"Precision: {precision_score(y_true, y_pred_cnn):.4f}")
print(f"Recall   : {recall_score(y_true, y_pred_cnn):.4f}")
print(f"F1 Score : {f1_score(y_true, y_pred_cnn):.4f}")

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 835ms/step
===== CNN Test Results =====
Accuracy : 0.7333
Precision: 0.7258
Recall   : 0.7500
F1 Score : 0.7377


In [11]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

# Predict and threshold
y_pred_vit = (vit_classifier.predict(X_test) > 0.5).astype(int).flatten()

# Evaluation
print("===== ViT Test Results =====")
print(f"Accuracy : {accuracy_score(y_test, y_pred_vit):.4f}")
print(f"Precision: {precision_score(y_test, y_pred_vit):.4f}")
print(f"Recall   : {recall_score(y_test, y_pred_vit):.4f}")
print(f"F1 Score : {f1_score(y_test, y_pred_vit):.4f}")




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
===== ViT Test Results =====
Accuracy : 0.6292
Precision: 0.6303
Recall   : 0.6250
F1 Score : 0.6276


In [9]:
# === Predict VGG ===
y_pred_vgg = vgg_model.predict(test_ds)
y_pred_vgg = (y_pred_vgg > 0.5).astype(int).flatten()

# === Scores ===
print("===== VGG Test Results =====")
print(f"Accuracy : {accuracy_score(y_true, y_pred_vgg):.4f}")
print(f"Precision: {precision_score(y_true, y_pred_vgg):.4f}")
print(f"Recall   : {recall_score(y_true, y_pred_vgg):.4f}")
print(f"F1 Score : {f1_score(y_true, y_pred_vgg):.4f}")


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 18s/step
===== VGG Test Results =====
Accuracy : 0.8708
Precision: 0.8803
Recall   : 0.8583
F1 Score : 0.8692


In [12]:
vgg_model.save("vgg16_best_model.h5")



In [13]:
from tensorflow.keras.models import load_model
vgg_model = load_model("vgg16_best_model.h5")




In [14]:
pip install gradio




In [15]:
import gradio as gr
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.vgg16 import preprocess_input
import numpy as np
from PIL import Image

# Load your trained model
model = load_model("vgg16_best_model.h5")

# Define prediction function
def classify_image(img):
    img = img.resize((224, 224))  # Resize to VGG16 input size
    img_array = np.array(img)
    img_array = preprocess_input(img_array)
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

    prediction = model.predict(img_array)[0][0]
    label = "FAKE" if prediction < 0.5 else "REAL"
    confidence = 1 - prediction if prediction < 0.5 else prediction
    return f"{label} ({confidence:.2%} confidence)"

# Launch Gradio interface
gr.Interface(
    fn=classify_image,
    inputs=gr.Image(type="pil"),
    outputs="text",
    title="VGG16 Image Classifier: Real or AI Generated?",
    description="Upload an image and the model will tell you if it's REAL or FAKE (AI-generated)"
).launch()




It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://46e25ff50291b90981.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


