<a href="https://colab.research.google.com/github/ramaastra/sekarya-machine-learning/blob/main/sekarya_model_with_kfold.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!ls drive/MyDrive/New-Sekarya-Dataset/

ls: cannot access 'drive/MyDrive/New-Sekarya-Dataset/': No such file or directory


In [None]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import img_to_array, load_img
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pandas as pd
import numpy as np
import random
import os

In [None]:
dataset_dir = f'drive/MyDrive/New-Sekarya-Dataset/'
os.listdir(dataset_dir)

['train', 'test']

In [None]:
train_dir = os.path.join(dataset_dir, 'train')
os.listdir(train_dir)

['ai_generated', 'non_ai_generated']

In [None]:
train_fake_dir = os.path.join(train_dir, 'ai_generated')
train_real_dir = os.path.join(train_dir, 'non_ai_generated')
test_dir = os.path.join(dataset_dir, 'test')

print(f'There are {len(os.listdir(train_fake_dir))} images of fake images for training.\n')
print(f'There are {len(os.listdir(train_real_dir))} images of real images for training.\n')
print(f'There are {len(os.listdir(test_dir))} images for testing.\n')

There are 331 images of fake images for training.

There are 2501 images of real images for training.

There are 2192 images for testing.



In [None]:
images = []
labels = []

class_labels = os.listdir(train_dir)
for class_label in class_labels:
  class_data_path = os.path.join(train_dir, class_label)
  for filename in os.listdir(class_data_path):
    file_path = os.path.join(class_data_path, filename)
    images.append(file_path)
    labels.append(class_label)

print(f'There are {len(images)} images will be splitted with K-Fold.\n')

There are 2832 images will be splitted with K-Fold.



In [None]:
def cnn_model():
  model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid'),
  ])

  model.compile(optimizer=Adam(learning_rate=1e-4),
                loss=BinaryCrossentropy(),
                metrics=['accuracy'])

  return model

In [None]:
early_stopping = EarlyStopping(monitor='val_loss',
                               patience=5,
                               mode='auto',
                               restore_best_weights=True)

In [None]:
num_folds = 6
kfold = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=128)

In [None]:
accuracies = []
losses = []

for k, (train_indices, val_indices) in enumerate(kfold.split(images, labels)):
  print('==============================================================')
  print(f'[Processing Fold-{k}...]')

  # Creating lists for images and labels based on the train and val indices
  x_train = [images[i] for i in train_indices]
  y_train = [labels[i] for i in train_indices]
  x_val = [images[i] for i in val_indices]
  y_val = [labels[i] for i in val_indices]

  # Creating dataframe for each train and val list
  train_df = pd.DataFrame({
    'image': x_train,
    'label': y_train
  })
  val_df = pd.DataFrame({
    'image': x_val,
    'label': y_val
  })

  # Creating the image generator to process the images
  train_datagen = ImageDataGenerator(rescale=1./255.0)
  val_datagen = ImageDataGenerator(rescale=1./255.0)

  train_generator = train_datagen.flow_from_dataframe(train_df,
                                                      x_col='image',
                                                      y_col='label',
                                                      target_size=(224, 224),
                                                      batch_size=64,
                                                      color_mode='rgb',
                                                      class_mode='binary')

  val_generator = val_datagen.flow_from_dataframe(val_df,
                                                  x_col='image',
                                                  y_col='label',
                                                  target_size=(224, 224),
                                                  batch_size=64,
                                                  color_mode='rgb',
                                                  class_mode='binary')

  # Train the model for this fold
  model = cnn_model()
  history = model.fit(train_generator,
                      epochs=10,
                      validation_data=val_generator,
                      verbose=1,
                      callbacks=[early_stopping])

  # Evaluate the model on the validation set
  _, accuracy = model.evaluate(val_generator)
  print(f'\nValidation Accuracy for Fold-{k}: {accuracy}')

  # Save the model into directory
  model.save(f'/content/models/model-{k}.h5')

  # Store the accuracy and loss for this fold
  accuracies.append(accuracy)
  losses.append(history.history['loss'][-1])

  print('==============================================================\n\n')

# Calculate average accuracy and loss across folds
average_accuracy = sum(accuracies) / num_folds
average_loss = sum(losses) / num_folds

print(f'Average accuracy: {average_accuracy:.4f}')
print(f'Average loss: {average_loss:.4f}')

[Processing Fold-0...]
Found 2360 validated image filenames belonging to 2 classes.
Found 472 validated image filenames belonging to 2 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Validation Accuracy for Fold-0: 0.8813559412956238


[Processing Fold-1...]
Found 2360 validated image filenames belonging to 2 classes.
Found 472 validated image filenames belonging to 2 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Validation Accuracy for Fold-1: 0.8792372941970825


[Processing Fold-2...]
Found 2360 validated image filenames belonging to 2 classes.
Found 472 validated image filenames belonging to 2 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10

Validation Accuracy for Fold-2: 0.8771186470985413


[Processing Fold-3...]
Found 2360 validated image filenames belonging to 