In [None]:
import numpy as np
import os
import hashlib
import tensorflow as tf
from PIL import Image
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import load_img, img_to_array  # Correct import
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dropout, Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model


In [None]:
# Check if GPU is available
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"GPUs available: {gpus}")
else:
    print("No GPU found.")

In [None]:
import os
import hashlib
from PIL import Image

def calculate_hash(image_path):

    #Calculate the hash of an image.
    with Image.open(image_path) as img:
        img = img.convert("RGB")  # Ensure the image is in RGB format
        img = img.resize((8, 8))  # Resize to reduce size and create hash
        hash_value = hashlib.md5(img.tobytes()).hexdigest()  # Create hash
    return hash_value

def find_and_remove_duplicates(folder_path):

    #Find and remove duplicate images in a given folder.

    #If cannot find path/ folder, Print that it does not exist
    if not os.path.exists(folder_path):

        print(f"The folder '{folder_path}' may not exist.")
        return

    print(f"Scanning folder: {folder_path}")

    hashes = {}
    duplicates = []

    for filename in os.listdir(folder_path):# for each file in the folder

        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):# if file is an image

            file_path = os.path.join(folder_path, filename) #generate a path to the specific image

            print(f"Processing file: {file_path}")  # Debug output

            img_hash = calculate_hash(file_path)

            if img_hash in hashes:
                duplicates.append(file_path)  # Found a duplicate
                print(f"Duplicate found: {file_path} (duplicate of {hashes[img_hash]})")
            else:
                hashes[img_hash] = file_path

    # Remove duplicates
    for duplicate in duplicates:

        os.remove(duplicate)
        print(f"Removed duplicate: {duplicate}")

    if not duplicates:
        print("No duplicates found.")

if __name__ == '__main__':
    folder = input("Enter the path to the folder containing photos: ")
    find_and_remove_duplicates(folder)

In [None]:
# Model code starts here

def augment_image(image, label):
    image = tf.image.random_flip_left_right(image)  # Randomly flip images
    image = tf.image.random_brightness(image, max_delta=0.1)  # Random brightness
    return image, label

# Load the dataset
train_data = tf.keras.utils.image_dataset_from_directory(
    '/Users/tesne/Desktop/ctp_project/archive/train',
    image_size=(224, 224),
    batch_size=32,
    shuffle=True
)

In [None]:
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Add custom classification layers
x = base_model.output
x = GlobalAveragePooling2D()(x)            # Global average pooling
x = Dense(128, activation='relu')(x)       # Fully connected layer
x = Dropout(0.5)(x)                        # Dropout layer- for feature optimization + prevents overfitting // can be modified or taken out based on results
x = Dense(1, activation='sigmoid')(x)      # Output layer for binary classification

# Create the final model
model = Model(inputs=base_model.input, outputs=x)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Summary of the model
model.summary()



In [None]:
# Train the model
model.fit(train_data, epochs=5)
model.save('/Users/tesne/Desktop/EfficientNet_Training/efficientNet_model1.h5')

In [None]:
# Test images here
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Load the test dataset
test_data = tf.keras.utils.image_dataset_from_directory(
    '/Users/tesne/Desktop/ctp_project/archive/test2',  # Your test directory
    image_size=(224, 224),
    batch_size=32,
    shuffle=False  # Important to keep the order for predictions
)

# Make predictions
predictions = model.predict(test_data)

# Convert predictions to binary labels
predicted_classes = (predictions > 0.5).astype("int32")

# Get true labels from the dataset
true_classes = test_data.classes  # true labels from the dataset

# Create confusion matrix
cm = confusion_matrix(true_classes, predicted_classes)

# Optionally, normalize the confusion matrix (useful for imbalanced datasets)
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

# Plot confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm_normalized, display_labels=['Fake', 'Real'])
disp.plot(cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.show()

