In [7]:
import os
import hashlib
from PIL import Image

def calculate_hash(image_path):

    #Calculate the hash of an image.
    with Image.open(image_path) as img:
        img = img.convert("RGB")  # Ensure the image is in RGB format
        img = img.resize((8, 8))  # Resize to reduce size and create hash
        hash_value = hashlib.md5(img.tobytes()).hexdigest()  # Create hash
    return hash_value

def find_and_remove_duplicates(folder_path):

    #Find and remove duplicate images in a given folder.

    #If cannot find path/ folder, Print that it does not exist
    if not os.path.exists(folder_path):

        print(f"The folder '{folder_path}' may not exist.")
        return

    print(f"Scanning folder: {folder_path}")

    hashes = {}
    duplicates = []

    for filename in os.listdir(folder_path):# for each file in the folder

        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):# if file is an image

            file_path = os.path.join(folder_path, filename) #generate a path to the specific image

            print(f"Processing file: {file_path}")  # Debug output

            img_hash = calculate_hash(file_path)

            if img_hash in hashes:
                duplicates.append(file_path)  # Found a duplicate
                print(f"Duplicate found: {file_path} (duplicate of {hashes[img_hash]})")
            else:
                hashes[img_hash] = file_path

    # Remove duplicates
    for duplicate in duplicates:

        os.remove(duplicate)
        print(f"Removed duplicate: {duplicate}")

    if not duplicates:
        print("No duplicates found.")

if __name__ == '__main__':
    folder = input("Enter the path to the folder containing photos: ")
    find_and_remove_duplicates(folder)

Enter the path to the folder containing photos:  


The folder '' may not exist.


In [8]:
# Model code starts here
import numpy as np
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import load_img, img_to_array  # Correct import
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input

# Create an ImageDataGenerator to rescale pixel values
datagen = ImageDataGenerator(rescale=1.0/255.0)  

# Load the images from folders
train_data = datagen.flow_from_directory(
    '/Users/keirajames/Desktop/archive-2/train/',  # REPLACE THIS FOR YOUR DIRECTORY
    target_size=(64, 64),  # Resize images to 64x64 pixels
    batch_size=32,         # Load images in batches of 32
    class_mode='binary'    # Since we have two types (fake vs real) use binary mode
)

Found 96636 images belonging to 2 classes.


In [9]:
# Build model
model = Sequential()

# Add an explicit Input layer
model.add(Input(shape=(64, 64, 3)))  # Input layer specifying the image size and 3 channels (RGB)

# First convolutional layer
model.add(Conv2D(32, (3, 3), activation='relu'))  # No need for input_shape here
model.add(MaxPooling2D(pool_size=(2, 2)))  # Max pooling to reduce size

# Second convolutional layer
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the feature maps into a 1D vector
model.add(Flatten())

# Fully connected layer
model.add(Dense(128, activation='relu'))

# Output layer (binary classification: fake vs real)
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Summary of the model
#model.summary()

In [10]:
# Train the model
model.fit(train_data, epochs=10)  

Epoch 1/8


  self._warn_if_super_not_called()


[1m3020/3020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m277s[0m 91ms/step - accuracy: 0.8289 - loss: 0.3741
Epoch 2/8
[1m3020/3020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 86ms/step - accuracy: 0.9249 - loss: 0.1902
Epoch 3/8
[1m3020/3020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m260s[0m 86ms/step - accuracy: 0.9434 - loss: 0.1468
Epoch 4/8
[1m3020/3020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m279s[0m 92ms/step - accuracy: 0.9570 - loss: 0.1123
Epoch 5/8
[1m3020/3020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m288s[0m 95ms/step - accuracy: 0.9677 - loss: 0.0849
Epoch 6/8
[1m3020/3020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m291s[0m 96ms/step - accuracy: 0.9761 - loss: 0.0634
Epoch 7/8
[1m3020/3020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m274s[0m 91ms/step - accuracy: 0.9822 - loss: 0.0496
Epoch 8/8
[1m3020/3020[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1188s[0m 393ms/step - accuracy: 0.9864 - loss: 0.0379


<keras.src.callbacks.history.History at 0x144273ef0>

In [21]:
'''
from PIL import Image
import numpy as np

def preprocess_image(image_path, target_size=(224, 224)):
   
    # Load the image
    image = Image.open(image_path)

    # Resize the image to target size
    image = image.resize(target_size)

    # Convert the image to RGB (in case it's not)
    image = image.convert("RGB")

    # Convert the image to a numpy array
    image_array = np.array(image)

    # Normalize the image data to [0, 1]
    image_array = image_array / 255.0

    # If your model expects batches of images, add a batch dimension
    image_array = np.expand_dims(image_array, axis=0)

    return image_array

# Example usage
image_path = '/Users/keirajames/Desktop/archive-2/test/photos/HNI_0017.jpg'
preprocessed_image = preprocess_image(image_path)
'''

In [16]:
# Test images here
def predict_images_from_directory(directory):
    results = []
    fake_count = 0
    total = 0
    
    if not os.path.exists(directory):
        print(f"Directory {directory} does not exist.")
        return []

    for filename in os.listdir(directory):
        if filename.endswith(('.jpg', '.jpeg', '.png')):
            
            img_path = os.path.join(directory, filename)
            img = load_img(img_path, target_size=(64, 64))  # Use load_img from keras
            img = img_to_array(img)  # Convert image to array
            img = np.expand_dims(img, axis=0)
            img = img / 255.0  # Normalize  image
            
            prediction = model.predict(img)

            if prediction[0][0] >= 0.7:
                results.append((filename, 'Real'))
                
            else:
                results.append((filename, 'Fake'))
                fake_count = fake_count + 1
            
            total = total + 1
    

    print("Out of ",total , ", ", fake_count, "were read as fake.")
    #return results
    
# Path to your directory 
new_image_directory = '/Users/keirajames/Desktop/archive-2/test/REAL/'

# Get predictions
predictions = predict_images_from_directory(new_image_directory)

#for filename, label in predictions:
#    print(f"{filename}: {label}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24

In [29]:
def predict_images_from_directory(directory):
    results = []
    real_count = 0
    total = 0
    
    if not os.path.exists(directory):
        print(f"Directory {directory} does not exist.")
        return []

    for filename in os.listdir(directory):
        if filename.endswith(('.jpg', '.jpeg', '.png')):
            
            img_path = os.path.join(directory, filename)
            img = load_img(img_path, target_size=(64, 64))  # Use load_img from keras
            img = img_to_array(img)  # Convert image to array
            img = np.expand_dims(img, axis=0)
            img = img / 255.0  # Normalize  image
            
            prediction = model.predict(img)

            if prediction[0][0] >= 0.8:
                results.append((filename, 'Real'))
                real_count = real_count + 1
            else:
                results.append((filename, 'Fake'))
                
            
            total = total + 1
            
    

    print("Out of ",total , ", ", real_count, "were read as real.")
    return results
    
# Path to your directory 
new_image_directory = '/Users/keirajames/Desktop/archive-2/test/photo/'

# Get predictions
predictions = predict_images_from_directory(new_image_directory)

for filename, label in predictions:
    print(f"{filename}: {label}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
Out of  6 ,  4 were read as real.
0070 (8).jpg: Fake
0060 (3).jpg: Real
0065 (2).jpg: Real
0065 (9).jpg: Fake
0064 (5).jpg: Real
0067 (3).jpg: Real
