In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import os
import numpy as np
import matplotlib.pyplot as plt
import logging


In [14]:
import torch
from facenet_pytorch import MTCNN, InceptionResnetV1
from PIL import Image
import os
import shutil
from collections import defaultdict
import numpy as np
import base64
import pandas as pd  # For Excel export

# Set up device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Load face detection model
mtcnn = MTCNN(keep_all=True, device=device)

# Load face recognition model
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# Function to detect faces in an image
def detect_faces(image_path):
    print(f"Detecting faces in {image_path}")
    try:
        img = Image.open(image_path)
        faces = mtcnn(img)
        return faces is not None and len(faces) > 0
    except Exception as e:
        print(f"Error processing {image_path}: {str(e)}")
        return False

# Function to move images with detected faces to a new folder
def sort_images(source_folder, destination_folder):
    print(f"Sorting images from {source_folder} to {destination_folder}")
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)
    
    for filename in os.listdir(source_folder):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_path = os.path.join(source_folder, filename)
            if detect_faces(image_path):
                shutil.move(image_path, os.path.join(destination_folder, filename))
                print(f"Moved {filename} to {destination_folder}")

# Function to recognize and cluster faces based on similarity
def recognize_faces(folder_path, similarity_threshold=0.50):  # Lower threshold for fewer clusters
    print(f"Recognizing faces in {folder_path}")
    face_counts = defaultdict(int)
    known_embeddings = []
    face_images = defaultdict(list)  # Store all images for each person

    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_path = os.path.join(folder_path, filename)
            try:
                img = Image.open(image_path)
                faces = mtcnn(img)
                
                if faces is not None:
                    faces = faces.to(device)
                    embeddings = resnet(faces).detach().cpu().numpy()
                    
                    for embedding in embeddings:
                        if not known_embeddings:
                            known_embeddings.append(embedding)
                            face_counts["Person_1"] += 1
                            face_images["Person_1"].append(image_path)  # Save image for this person
                        else:
                            # Calculate cosine similarity
                            similarities = [np.dot(embedding, known_emb) / (np.linalg.norm(embedding) * np.linalg.norm(known_emb)) for known_emb in known_embeddings]
                            max_similarity = max(similarities)
                            
                            if max_similarity > similarity_threshold:
                                cluster_id = similarities.index(max_similarity) + 1
                                face_counts[f"Person_{cluster_id}"] += 1
                                face_images[f"Person_{cluster_id}"].append(image_path)
                            else:
                                new_cluster_id = len(known_embeddings) + 1
                                known_embeddings.append(embedding)
                                face_counts[f"Person_{new_cluster_id}"] += 1
                                face_images[f"Person_{new_cluster_id}"].append(image_path)  # Save new image
            except Exception as e:
                print(f"Error processing {filename}: {str(e)}")

    return face_counts, face_images

# Function to generate an HTML file for viewing the face clusters and their associated images
def generate_html(face_counts, face_images, output_html):
    print(f"Generating HTML report in {output_html}")
    html_content = "<html><body><h1>Face Clustering Report</h1>"
    for person, count in sorted(face_counts.items(), key=lambda x: int(x[0].split('_')[1])):
        # Convert image to base64 to embed in HTML
        image_path = face_images[person][0]  # Use the first image for each person
        with open(image_path, "rb") as img_file:
            b64_string = base64.b64encode(img_file.read()).decode('utf-8')
        
        html_content += f"<h2>{person}: {count} occurrences</h2>"
        html_content += f'<img src="data:image/jpeg;base64,{b64_string}" style="width:200px;height:auto;"/><br><br>'
    
    html_content += "</body></html>"

    # Write the HTML content to the output file
    with open(output_html, "w") as file:
        file.write(html_content)

# Function to generate an Excel file with pictures tagged to each person
def generate_excel(face_counts, face_images, output_excel):
    data = []
    for person, count in sorted(face_counts.items(), key=lambda x: int(x[0].split('_')[1])):
        images = face_images[person]
        for img in images:
            # Add the person, count, and hyperlink to the image
            data.append({
                'Person': person,
                'Image Count': count,
                'Image': f'=HYPERLINK("{img}", "View Image")'
            })
    
    # Convert the data into a pandas DataFrame
    df = pd.DataFrame(data)

    # Write to Excel
    df.to_excel(output_excel, index=False)
    print(f"Excel report generated at {output_excel}")

# Usage
source_folder = 'C:/Users/nrajyaguru/python files/CV Friend/Pics'
destination_folder = 'C:/Users/nrajyaguru/python files/CV Friend/Pics/test'
output_html = 'C:/Users/nrajyaguru/python files/CV Friend/Pics/report.html'
output_excel = 'C:/Users/nrajyaguru/python files/CV Friend/Pics/report.xlsx'

# Sort images into a new folder
sort_images(source_folder, destination_folder)

# Recognize faces and cluster them
face_counts, face_images = recognize_faces(destination_folder)

# Print the results
print("Unique faces and their occurrences:")
for name, count in sorted(face_counts.items(), key=lambda x: int(x[0].split('_')[1])):
    print(f"{name}: {count}")

print(f"Total unique faces: {len(face_counts)}")

# Generate an HTML report with the face clusters and images
#generate_html(face_counts, face_images, output_html)

# Generate an Excel report listing each person and their images
generate_excel(face_counts, face_images, output_excel)

#print(f"HTML report generated at {output_html}")
print(f"Excel report generated at {output_excel}")


Sorting images from C:/Users/nrajyaguru/python files/CV Friend/Pics to C:/Users/nrajyaguru/python files/CV Friend/Pics/test
Detecting faces in C:/Users/nrajyaguru/python files/CV Friend/Pics\0392862f7302293b7fabe8b0cd8ef868.0.jpg
Moved 0392862f7302293b7fabe8b0cd8ef868.0.jpg to C:/Users/nrajyaguru/python files/CV Friend/Pics/test
Detecting faces in C:/Users/nrajyaguru/python files/CV Friend/Pics\135060513_756232958314519_7566079181329053147_n.jpg
Moved 135060513_756232958314519_7566079181329053147_n.jpg to C:/Users/nrajyaguru/python files/CV Friend/Pics/test
Detecting faces in C:/Users/nrajyaguru/python files/CV Friend/Pics\20200215_181959.jpg
Moved 20200215_181959.jpg to C:/Users/nrajyaguru/python files/CV Friend/Pics/test
Detecting faces in C:/Users/nrajyaguru/python files/CV Friend/Pics\20220114_213308.jpg
Moved 20220114_213308.jpg to C:/Users/nrajyaguru/python files/CV Friend/Pics/test
Detecting faces in C:/Users/nrajyaguru/python files/CV Friend/Pics\20220114_213321.jpg
Moved 20220

Excel report generated at C:/Users/nrajyaguru/python files/CV Friend/Pics/report.xlsx
