In [6]:
import pandas as pd
import cv2
import numpy as np
from sklearn.decomposition import PCA
from sklearn.cluster import DBSCAN
from deepface import DeepFace
from docx import Document
from docx.shared import Inches
import os
import warnings
import dlib
from sklearn.metrics.pairwise import cosine_similarity

# Ignore warnings
warnings.filterwarnings('ignore')

# Load dataset
file_path = 'Assignment Data.xlsx'
df = pd.read_excel(file_path, sheet_name='Sheet1')
df = df[['Video URL', 'Performance']]

# Detect faces in frames
def detect_faces(frame):
    detector = dlib.get_frontal_face_detector()
    gray = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
    faces = detector(gray)
    return [frame[face.top():face.bottom(), face.left():face.right()] for face in faces if face.width() > 0 and face.height() > 0]

# Initialize embeddings and metadata storage
face_embeddings = []
face_metadata = []

# Directory to save representative face images
os.makedirs('cluster_faces', exist_ok=True)

# Function to calculate cosine similarity between embeddings
def get_cosine_similarity(embedding1, embedding2):
    return cosine_similarity([embedding1], [embedding2])[0][0]

# Process videos
for index, row in df.iterrows():
    video_url = row['Video URL']
    performance = row['Performance']

    video_capture = cv2.VideoCapture(video_url)
    if not video_capture.isOpened():
        print(f"Skipping inaccessible video: {video_url}")
        continue

    frame_count = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
    if frame_count == 0:
        print(f"Skipping empty video: {video_url}")
        continue

    step = max(frame_count // 50, 1)  # Sample more frames by reducing step size
    for frame_num in range(0, frame_count, step):
        video_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
        success, frame = video_capture.read()
        if not success or frame is None:
            continue

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        faces = detect_faces(rgb_frame)

        if not faces:
            print(f"No faces detected in frame {frame_num} of video: {video_url}")
            continue

        for face in faces:
            try:
                # Get facial embeddings
                embeddings = DeepFace.represent(face, model_name='Facenet', enforce_detection=False, detector_backend='opencv')
                if embeddings:
                    # Analyze the emotion
                    emotion_analysis = DeepFace.analyze(face, actions=['emotion'], enforce_detection=False)
                    dominant_emotion = emotion_analysis[0]['dominant_emotion']

                    # Append face embedding, performance, and emotion to metadata
                    face_embeddings.append(embeddings[0]['embedding'])
                    face_metadata.append({'video_url': video_url, 'performance': performance,
                                          'face_image': face, 'emotion': dominant_emotion,
                                          'embedding': embeddings[0]['embedding']})  # Store the embedding
            except ValueError as e:
                print(f"Failed to extract embedding: {e}")
                continue

    video_capture.release()

# Clustering step
if face_embeddings:
    face_embeddings = np.array(face_embeddings)
    n_components = min(5, len(face_embeddings), face_embeddings.shape[1])
    pca = PCA(n_components=n_components)
    reduced_embeddings = pca.fit_transform(face_embeddings)

    dbscan = DBSCAN(eps=0.3, min_samples=2).fit(reduced_embeddings)
    unique_labels = dbscan.labels_

    # Combine clustering results with metadata
    metadata_df = pd.DataFrame(face_metadata)
    metadata_df['cluster'] = unique_labels

    # Eliminate duplicate clusters based on cosine similarity between embeddings
    unique_faces = {}
    for cluster in unique_labels:
        cluster_faces = metadata_df[metadata_df['cluster'] == cluster]

        # Process the cluster (even if it's noise, i.e., -1)
        representative_face = cluster_faces.iloc[0]  # Start with the first face in the cluster
        is_duplicate = False

        # Compare this face embedding with other face embeddings in the unique_faces dictionary to eliminate duplicates
        for _, other_face in unique_faces.items():
            similarity = get_cosine_similarity(representative_face['embedding'], other_face['embedding'])
            if similarity > 0.8:
                is_duplicate = True
                break

        # If not a duplicate, add this face to the unique faces
        if not is_duplicate:
            unique_faces[cluster] = representative_face

    # Create a DataFrame for unique faces
    unique_faces_df = pd.DataFrame(unique_faces).T

    # Calculate average performance for each cluster
    performance_stats = unique_faces_df.groupby('cluster').agg(
        avg_performance=('performance', 'mean')
    ).reset_index()

    # Sort clusters by descending average performance
    performance_stats = performance_stats.sort_values(by='avg_performance', ascending=False)

    # Save results in a Word document
    document = Document()
    document.add_heading('Cluster Performance Report', 0)

    table = document.add_table(rows=1, cols=4)  # Add an extra column for emotion
    hdr_cells = table.rows[0].cells
    hdr_cells[0].text = 'Cluster Number'
    hdr_cells[1].text = 'Average Performance'
    hdr_cells[2].text = 'Face Image'
    hdr_cells[3].text = 'Dominant Emotion'  # New column for emotion

    for _, row in performance_stats.iterrows():
        cluster = row['cluster']
        avg_performance = row['avg_performance']

        cluster_faces = metadata_df[metadata_df['cluster'] == cluster]
        if not cluster_faces.empty:
            # Save the first representative face from the cluster
            face_image = cluster_faces.iloc[0]['face_image']
            face_filename = f'cluster_faces/cluster_{cluster}_face.jpg'
            cv2.imwrite(face_filename, cv2.cvtColor(face_image, cv2.COLOR_RGB2BGR))

            # Get the emotion of the representative face
            dominant_emotion = cluster_faces.iloc[0]['emotion']

            # Add data to the table
            row_cells = table.add_row().cells
            row_cells[0].text = str(cluster)
            row_cells[1].text = f'{avg_performance:.2f}'
            paragraph = row_cells[2].paragraphs[0]
            run = paragraph.add_run()
            run.add_picture(face_filename, width=Inches(1.5))
            row_cells[3].text = dominant_emotion  # Add emotion text to the table

    # Save the Word document
    document.save('cluster_performance_report.docx')
    print("Results saved to 'cluster_performance_report.docx'")

else:
    print("No faces detected.")


No faces detected in frame 0 of video: https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-989930303148492
No faces detected in frame 11 of video: https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-989930303148492
No faces detected in frame 0 of video: https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-975523146942238
No faces detected in frame 10 of video: https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-975523146942238
24-11-30 12:44:14 - facenet_weights.h5 will be downloaded...


Downloading...
From: https://github.com/serengil/deepface_models/releases/download/v1.0/facenet_weights.h5
To: /root/.deepface/weights/facenet_weights.h5
100%|██████████| 92.2M/92.2M [00:00<00:00, 120MB/s]


24-11-30 12:44:19 - facial_expression_model_weights.h5 will be downloaded...


Downloading...
From: https://github.com/serengil/deepface_models/releases/download/v1.0/facial_expression_model_weights.h5
To: /root/.deepface/weights/facial_expression_model_weights.h5
100%|██████████| 5.98M/5.98M [00:00<00:00, 100MB/s]


No faces detected in frame 0 of video: https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-960884234555681
No faces detected in frame 7 of video: https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-960884234555681
No faces detected in frame 14 of video: https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-960884234555681
No faces detected in frame 21 of video: https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-960884234555681
No faces detected in frame 0 of video: https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-944981830978756
No faces detected in frame 7 of video: https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-944981830978756
No faces detected in frame 0 of video: https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-932296771464171
No faces detected in frame 10 of video: https://fgimagestorage.blob.core.windows.net/facebook-assets/hd-932296771464171
No faces detected in frame 0 of video: https: