In [4]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
import shutil

# Constants
IMAGE_WIDTH, IMAGE_HEIGHT = 150, 150
NUM_CLASSES = 9

# Load and preprocess the dataset
def load_dataset(data_dir):
    X, y = [], []
    categories = os.listdir(data_dir)
    for i, category in enumerate(categories):
        category_path = os.path.join(data_dir, category)
        if os.path.isdir(category_path):
            image_names = os.listdir(category_path)
            for image_name in image_names:
                image_path = os.path.join(category_path, image_name)
                img = Image.open(image_path).resize((IMAGE_WIDTH, IMAGE_HEIGHT))
                img = img.convert("RGB")  # Convert to RGB format (remove alpha channel if present)
                X.append(np.array(img))
                y.append(i)
    return np.array(X), np.array(y)

# Create the CNN model
def create_model():
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, 3)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(NUM_CLASSES, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Main function
if __name__ == "__main__":
    # Step 1: Organize the dataset in separate folders for each category

    # Step 2: Load and preprocess the dataset
    data_dir = "C:\\Users\\Prathiksai\\Downloads\\correct photo detection"
    X, y = load_dataset(data_dir)
    X = X.astype('float32') / 255.0  # Normalize pixel values between 0 and 1
    y = to_categorical(y, NUM_CLASSES)  # One-hot encode the labels

    # Step 3: Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Data Augmentation
    data_augmentation = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )

    # Step 4: Create and train the CNN model with data augmentation
    model = create_model()
    model.fit(data_augmentation.flow(X_train, y_train, batch_size=32),
              steps_per_epoch=len(X_train) // 32,
              epochs=20,
              validation_data=(X_test, y_test))

    # Save the trained model
    model.save("C:\\Users\\Prathiksai\\Downloads\\saved")
   

    # Step 5: Use the trained model to predict effects on new images
# Define the custom folder names for invalid image categories
invalid_folder_names = {
    0: "blurred_image",
    1: "facing_sideway",
    2: "inverted_image",
    3: "masked_pics",
    4: "signature",
    5: "small_sized",
    6: "with_spectacles",
    7: "zoomed_image"
    # Add more categories as needed
}
# Assuming you have a list of image file names corresponding to the images in the new dataset
# If not, you can extract the image names from the directory while loading the new dataset
new_data_dir = "C:\\Users\\Prathiksai\\Downloads\\New folder\\path_to_destination_folder_for_correct_images\\small_sized"
image_file_names = os.listdir(new_data_dir)

# Create the base folder for invalid images
invalid_output_folder = "C:\\Users\\Prathiksai\\Downloads\\New folder\\output_images\\Invalid"
os.makedirs(invalid_output_folder, exist_ok=True)

# Initialize a list to store the results
validity = []

# Determine the predicted class for each image and move to the corresponding folder
for img_file_name in image_file_names:
    # Load and preprocess the new image
    img_path = os.path.join(new_data_dir, img_file_name)
    img = Image.open(img_path).resize((IMAGE_WIDTH, IMAGE_HEIGHT))
    img = img.convert("RGB")
    img = np.array(img) / 255.0
    img = np.expand_dims(img, axis=0)  # Add batch dimension

    # Make predictions for the single image
    prediction = model.predict(img)

    # Get the probability of the predicted class
    pred_prob = np.max(prediction)

    # Determine if the image is invalid based on the threshold
    threshold = 0.5
    is_invalid = pred_prob < threshold

    # Move the image to the corresponding folder
    if is_invalid:
        # Get the predicted class index
        predicted_class_index = np.argmax(prediction)
        if predicted_class_index in invalid_folder_names:
            invalid_category_folder = os.path.join(invalid_output_folder, invalid_folder_names[predicted_class_index])
        else:
            invalid_category_folder = os.path.join(invalid_output_folder, "unknown_invalid_category")
        os.makedirs(invalid_category_folder, exist_ok=True)
        new_img_path = os.path.join(invalid_category_folder, img_file_name)
        shutil.move(img_path, new_img_path)
    else:
        valid_output_folder = "C:\\Users\\Prathiksai\\Downloads\\New folder\\output_images\\Valid"
        new_img_path = os.path.join(valid_output_folder, img_file_name)
        shutil.move(img_path, new_img_path)

        # Add the result to the validity list
    valid_status = invalid_folder_names[predicted_class_index] if is_invalid and predicted_class_index in invalid_folder_names else "Valid"
    validity.append((img_file_name, valid_status, pred_prob))

    # Print the result
    print(f"Image: {img_file_name}, Validity: {valid_status}, Probability: {pred_prob:.2f}")

# Save the results as a CSV file
result_df = pd.DataFrame(validity, columns=['Image Name', 'Validity', 'Probability'])
result_df.to_csv("C:\\Users\\Prathiksai\\Downloads\\New folder\\output_images\\results.csv", index=False)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
INFO:tensorflow:Assets written to: C:\Users\Prathiksai\Downloads\saved\assets


INFO:tensorflow:Assets written to: C:\Users\Prathiksai\Downloads\saved\assets


Image: image012.png, Validity: Valid, Probability: 0.65
Image: image015.png, Validity: Valid, Probability: 0.84
Image: image030.png, Validity: Valid, Probability: 0.87
Image: image040.png, Validity: signature, Probability: 0.45
Image: image050.png, Validity: Valid, Probability: 0.70
Image: image051.png, Validity: Valid, Probability: 0.76
Image: image060.png, Validity: Valid, Probability: 0.79
Image: image069.png, Validity: Valid, Probability: 0.58
Image: image079.png, Validity: Valid, Probability: 0.73
Image: image091.png, Validity: signature, Probability: 0.47
Image: image103.png, Validity: Valid, Probability: 0.78
Image: image129.png, Validity: Valid, Probability: 0.80
Image: image137.png, Validity: Valid, Probability: 0.74
Image: image139.png, Validity: Valid, Probability: 0.66
Image: image143.png, Validity: Valid, Probability: 0.60
Image: image150.png, Validity: Valid, Probability: 0.87
Image: image151.png, Validity: Valid, Probability: 0.54
Image: image152.png, Validity: Valid, Pr