In [1]:
import os
import hashlib
from PIL import Image

def calculate_hash(image_path):

    #Calculate the hash of an image.
    with Image.open(image_path) as img:
        img = img.convert("RGB")  # Ensure the image is in RGB format
        img = img.resize((8, 8))  # Resize to reduce size and create hash
        hash_value = hashlib.md5(img.tobytes()).hexdigest()  # Create hash
    return hash_value

def find_and_remove_duplicates(folder_path):

    #Find and remove duplicate images in a given folder.

    #If cannot find path/ folder, Print that it does not exist
    if not os.path.exists(folder_path):

        print(f"The folder '{folder_path}' may not exist.")
        return

    print(f"Scanning folder: {folder_path}")

    hashes = {}
    duplicates = []

    for filename in os.listdir(folder_path):# for each file in the folder

        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):# if file is an image

            file_path = os.path.join(folder_path, filename) #generate a path to the specific image

            print(f"Processing file: {file_path}")  # Debug output

            img_hash = calculate_hash(file_path)

            if img_hash in hashes:
                duplicates.append(file_path)  # Found a duplicate
                print(f"Duplicate found: {file_path} (duplicate of {hashes[img_hash]})")
            else:
                hashes[img_hash] = file_path

    # Remove duplicates
    for duplicate in duplicates:

        os.remove(duplicate)
        print(f"Removed duplicate: {duplicate}")

    if not duplicates:
        print("No duplicates found.")

if __name__ == '__main__':
    folder = input("Enter the path to the folder containing photos: ")
    find_and_remove_duplicates(folder)

Enter the path to the folder containing photos:  


The folder '' may not exist.


In [2]:
# Model code starts here
import numpy as np
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import load_img, img_to_array  # Correct import
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input

# Create an ImageDataGenerator to rescale pixel values
datagen = ImageDataGenerator(rescale=1.0/255.0)  

# Load the images from folders
train_data = datagen.flow_from_directory(
    '/Users/keirajames/Desktop/archive-2/train/',  # REPLACE THIS FOR YOUR DIRECTORY
    target_size=(64, 64),  # Resize images to 64x64 pixels
    batch_size=32,         # Load images in batches of 32
    class_mode='binary'    # Since we have two types (fake vs real) use binary mode
)

2024-10-14 12:55:43.270694: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Found 98160 images belonging to 2 classes.


In [3]:
# Build model
model = Sequential()

# Add an explicit Input layer
model.add(Input(shape=(64, 64, 3)))  # Input layer specifying the image size and 3 channels (RGB)

# First convolutional layer
model.add(Conv2D(32, (3, 3), activation='relu'))  # No need for input_shape here
model.add(MaxPooling2D(pool_size=(2, 2)))  # Max pooling to reduce size

# Second convolutional layer
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the feature maps into a 1D vector
model.add(Flatten())

# Fully connected layer
model.add(Dense(128, activation='relu'))

# Output layer (binary classification: fake vs real)
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Summary of the model
#model.summary()

In [4]:
# Train the model
model.fit(train_data, epochs=5)  

  self._warn_if_super_not_called()


Epoch 1/5
[1m3068/3068[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m306s[0m 98ms/step - accuracy: 0.8297 - loss: 0.3660
Epoch 2/5
[1m3068/3068[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m306s[0m 100ms/step - accuracy: 0.9269 - loss: 0.1826
Epoch 3/5
[1m3068/3068[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33245s[0m 11s/step - accuracy: 0.9472 - loss: 0.1343
Epoch 4/5
[1m3068/3068[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m651s[0m 212ms/step - accuracy: 0.9626 - loss: 0.0993
Epoch 5/5
[1m3068/3068[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m663s[0m 216ms/step - accuracy: 0.9747 - loss: 0.0683


<keras.src.callbacks.history.History at 0x1480e5880>

In [5]:
# Test images here
def predict_images_from_directory(directory):
    results = []

    if not os.path.exists(directory):
        print(f"Directory {directory} does not exist.")
        return []

    for filename in os.listdir(directory):
        if filename.endswith(('.jpg', '.jpeg', '.png')):
            img_path = os.path.join(directory, filename)
            img = load_img(img_path, target_size=(64, 64))  # Use load_img from keras
            img = img_to_array(img)  # Convert image to array
            img = np.expand_dims(img, axis=0)
            img = img / 255.0  # Normalize  image
            
            prediction = model.predict(img)

            if prediction[0][0] >= 0.5:
                results.append((filename, 'Real'))
            else:
                results.append((filename, 'Fake'))

    return results
    
# Path to your directory 
new_image_directory = '/Users/keirajames/Desktop/archive-2/test/photos/'

# Get predictions
predictions = predict_images_from_directory(new_image_directory)

for filename, label in predictions:
    print(f"{filename}: {label}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 248ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 153ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
0000 (3).jpg: Real
0000 (2).jpg: Real
7 (9).jpg: Real
0000 (5).jpg: Real
0000 (4).jpg: Fake
7 (10).jpg: Fake
7.jpg: Fake
