In [None]:
# Step 1: Mount Google Drive and import necessary libraries
from google.colab import drive
import os
import cv2
import numpy as np
import pandas as pd
from scipy.ndimage import variance
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.utils import resample

# Mount Google Drive
drive.mount('/content/drive')


MessageError: Error: credential propagation was unsuccessful

In [None]:
# Feature extraction functions
def calculate_exposure_score(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    hist = cv2.calcHist([gray], [0], None, [256], [0, 256])
    hist_norm = hist.ravel() / hist.sum()
    exposure_score = -np.sum(hist_norm * np.log2(hist_norm + 1e-7))
    return exposure_score

def calculate_focus_score(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    laplacian_var = cv2.Laplacian(gray, cv2.CV_64F).var()
    return laplacian_var

def calculate_composition_score(image):
    h, w, _ = image.shape
    central_region = image[int(h*0.3):int(h*0.7), int(w*0.3):int(w*0.7)]
    central_brightness = np.mean(cv2.cvtColor(central_region, cv2.COLOR_BGR2GRAY))
    overall_brightness = np.mean(cv2.cvtColor(image, cv2.COLOR_BGR2GRAY))
    composition_score = 1 - abs(central_brightness - overall_brightness) / overall_brightness
    return composition_score

def calculate_clarity_score(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=5)
    sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=5)
    sobel_combined = np.sqrt(sobelx**2 + sobely**2)
    clarity_score = np.mean(sobel_combined)
    return clarity_score

def calculate_detail_score(image):
    lab_image = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    l_channel = lab_image[:, :, 0]
    detail_score = variance(l_channel)
    return detail_score


In [None]:
# Function to extract features from an image
def extract_features(image_path):
    image = cv2.imread(image_path)
    exposure = calculate_exposure_score(image)
    focus = calculate_focus_score(image)
    composition = calculate_composition_score(image)
    clarity = calculate_clarity_score(image)
    detail = calculate_detail_score(image)
    return [exposure, focus, composition, clarity, detail]

# Function to load data by traversing all directories and subdirectories
def load_data_from_folders(main_folder_path):
    data = []
    folder_check = {'Good': 0, 'Bad': 0, 'Decent': 0}  # Sentence case labels

    # Walk through the main folder and its subfolders
    for root, dirs, files in os.walk(main_folder_path):
        if 'Good' in root:
            label = 'Good'
            folder_check['Good'] += 1
        elif 'Bad' in root:
            label = 'Bad'
            folder_check['Bad'] += 1
        elif 'Decent' in root:
            label = 'Decent'
            folder_check['Decent'] += 1
        else:
            continue  # Skip any folder that isn't Good, Bad, or Decent

        # Extract features from all images in this folder
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                image_path = os.path.join(root, file)
                features = extract_features(image_path)
                features.append(label)  # Append the label ("Good", "Bad", or "Decent")
                data.append(features)

    # Check if all folders (Good, Bad, Decent) were found in the structure
    for folder, count in folder_check.items():
        if count == 0:
            print(f"Warning: No {folder} folder found in the dataset.")

    return data

# Path to the main folder
main_folder_path = '/content/drive/My Drive/Photo Culling Using Machine Learning Dataset'

# Load data by traversing all subdirectories
all_data = load_data_from_folders(main_folder_path)


In [None]:
# If no images are found, raise an error
if len(all_data) == 0:
    raise ValueError("No images were found. Please check your folder paths and ensure that the images are correctly placed.")

# Create a DataFrame
columns = ['Exposure', 'Focus', 'Composition', 'Clarity', 'Detail', 'Label']
df = pd.DataFrame(all_data, columns=columns)

# Print the number of images in each category for debugging
print(f"Number of images in each category:\n{df['Label'].value_counts()}")

# Step 2: Prepare features (X) and labels (y)
X = df[['Exposure', 'Focus', 'Composition', 'Clarity', 'Detail']]
y = df['Label']

# Print the shape of the dataset for further debugging
print(f"Features shape: {X.shape}, Labels shape: {y.shape}")

# Step 3: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Step 4: Train a Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Step 5: Test the model
y_pred = clf.predict(X_test)

# Step 6: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Output the results
print("Initial Accuracy:", accuracy)
print("Initial Classification Report:\n", classification_rep)

# Step 7: Save the classification report to a file
report_path = '/content/drive/My Drive/classification_report.txt'
with open(report_path, 'w') as f:
    f.write(f"Accuracy: {accuracy}\n")
    f.write("Classification Report:\n")
    f.write(classification_rep)

print(f"Initial classification report saved to {report_path}")


In [None]:
# Cross-validation to assess model performance
scores = cross_val_score(clf, X, y, cv=5)
print(f"Cross-validation scores: {scores}")
print(f"Mean cross-validation accuracy: {scores.mean()}")


In [None]:
# Step 8: Misclassification Feedback Loop (Retraining with Export)
def feedback_loop_with_export(X_test, y_test, y_pred, X_train, y_train, clf, iterations=10):
    """
    Feedback loop to retrain on misclassified examples.
    Adds misclassified examples back into the training set and retrains the model.
    Exports the classification results to an Excel file after each iteration.
    """
    for i in range(iterations):
        # Identify misclassified examples
        misclassified_indices = [i for i in range(len(y_test)) if y_test.iloc[i] != y_pred[i]]

        if not misclassified_indices:
            print("No more misclassifications. Training complete.")
            break

        print(f"Iteration {i+1}: {len(misclassified_indices)} misclassified examples found.")

        # Add misclassified examples back into the training set
        X_misclassified = X_test.iloc[misclassified_indices]
        y_misclassified = y_test.iloc[misclassified_indices]

        # Ensure that n_samples is at least 1
        n_samples_to_add = max(1, int(0.5 * len(X_misclassified)))

        # Sample misclassified examples to prevent overfitting
        X_misclassified_sample, y_misclassified_sample = resample(X_misclassified, y_misclassified, replace=False, n_samples=n_samples_to_add)

        X_train = pd.concat([X_train, X_misclassified_sample])
        y_train = pd.concat([y_train, y_misclassified_sample])

        # Retrain the model on the updated training set
        clf.fit(X_train, y_train)

        # Test the model again
        y_pred = clf.predict(X_test)

        # Recalculate accuracy and classification report
        accuracy = accuracy_score(y_test, y_pred)
        classification_rep = classification_report(y_test, y_pred)

        print(f"Iteration {i+1} Accuracy: {accuracy}")
        print(f"Iteration {i+1} Classification Report:\n", classification_rep)

        # Save each iteration's classification report as an Excel file
        results_df = X_test.copy()  # Copy X_test to retain the feature information
        results_df['True Label'] = y_test.values  # Add true labels
        results_df['Predicted Label'] = y_pred  # Add predicted labels

        # Export the classification results to an Excel file
        excel_output_path = f'/content/drive/My Drive/image_classification_results_iteration_{i+1}.xlsx'
        results_df.to_excel(excel_output_path, index=False)

        print(f"Iteration {i+1} classification results saved to {excel_output_path}")

# Run the feedback loop for 10 iterations
feedback_loop_with_export(X_test, y_test, y_pred, X_train, y_train, clf)
