<a href="https://colab.research.google.com/github/Donald724276/AI-CW/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. Data Setup
import os

!pip install -q --upgrade kaggle
os.environ['KAGGLE_API_TOKEN'] = 'KGAT_fcee887368797d3cfb1d56d035c130fe'
!kaggle datasets download -d xhlulu/140k-real-and-fake-faces
!unzip -q 140k-real-and-fake-faces.zip -d dataset

In [None]:
# 2. Data Loader and Preprocessing

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet50 import preprocess_input

# Parameters
img_size = (224,224) # Dimension required by ResNet50
batch_size = 64

# Generator with Augmentation: Creates "new" images by working on an image
# to prevent the model from memorizing certain photos
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range = 20,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    horizontal_flip = True,
)

# Data Loading
base_dir = 'dataset/real_vs_fake/real-vs-fake'

# Loads Training Data
train_generator = train_datagen.flow_from_directory(
    base_dir + '/train',
    target_size = img_size,
    batch_size = batch_size,
    class_mode = 'binary',
)

# Loads Validation Data
val_generator = train_datagen.flow_from_directory(
    base_dir + '/valid',
    target_size = img_size,
    batch_size = batch_size,
    class_mode = 'binary',
    shuffle = False
)

# Generates and Loads Test Data
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
test_dir = base_dir + '/test'

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary',
    shuffle=True
)
# Generates Confusion Matrix Data
cfm_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False
)


In [None]:
# 3. Model Architecture
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import layers, models, optimizers

# Intialise ResNet50 Architecture as Base Model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224,3))
base_model.trainable = False

# Custom Classification Layers
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer=optimizers.Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics = ['accuracy'])
model.summary()


In [None]:
# 4. Model Training
history = model.fit(
    train_generator,
    validation_data = val_generator,
    epochs = 5
)

In [None]:
# 5. Fine Tuning
base_model.trainable = True

for layer in base_model.layers[:140]:
  layers.trainable = False

model.compile(optimizer = optimizers.Adam(learning_rate=0.00001),
              loss = 'binary_crossentropy',
              metrics = ['accuracy'])

history_fine = model.fit(
    train_generator,
    validation_data = val_generator,
    epochs = 5
)

In [None]:
# 6.1 Graph Visualisation
import matplotlib.pyplot as plt
import numpy as np

# Accuracy Graph
plt.figure(figsize = (10,4))
plt.subplot(1,2,1)
plt.plot(history_fine.history['accuracy'], label = 'Training Acc')
plt.plot(history_fine.history['val_accuracy'], label = 'Validation Acc')
plt.title('Model Accuracy')
plt.legend()
plt.grid(True)

# Loss Graph
plt.figure(figsize = (10,4))
plt.subplot(1,2,1)
plt.plot(history_fine.history['loss'], label = 'Training Loss')
plt.plot(history_fine.history['val_loss'], label = 'Validation Loss')
plt.title('Model Loss')
plt.legend()
plt.grid(True)

plt.show()

In [None]:
# 6.2 Prediction Visualisation
import matplotlib.pyplot as plt
import numpy as np

imgs, labels = next(test_generator)
predictions = model.predict(imgs)

plt.figure(figsize = (15,8))
for i in range(10):
  plt.subplot(2,5,i+1)
  plt.imshow(imgs[i])

  # Undo ResNet Preprocessing [Fix the colors(BGR --> RGB)]
  disp_img = imgs[i]
  disp_img[:,:,0] += 103.939
  disp_img[:,:,1] += 116.779
  disp_img[:,:,2] += 123.68
  disp_img = disp_img[:,:,::-1]
  disp_img = np.clip(disp_img, 0, 255).astype('uint8')

  plt.imshow(disp_img)

  score = predictions[i][0]
  pred_label = "FAKE" if score > 0.5 else "REAL"
  actual_label = "FAKE" if labels[i] == 1 else "REAL"

  col = 'green' if pred_label == actual_label else 'red'
  plt.title(f"Pred: {pred_label}\n({score:.2f})", color=col)
  plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# 7. Confusion Matrix
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

# Get Predictions
preds = model.predict(cfm_generator)

# Convert to 0s and 1s
predictions = (preds > 0.5).astype(int)

# Get the Answer Key
correct_answers = test_generator.classes

# Draw the Confusion Matrix
cm = confusion_matrix(correct_answers, predictions)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['Real', 'Fake'],
            yticklabels=['Real', 'Fake'])
plt.title('Confusion Matrix (Test Set)')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()

# 5. Print the Statistics
print(classification_report(correct_answers, predictions, target_names=['Real', 'Fake']))

In [None]:
# 8. Reload/Upload weights
from tensorflow.keras.models import load_model
from google.colab import files
import os

uploaded = files.upload()
filename = list(uploaded.keys())[0]
model = load_model(filename)
