In [1]:
import pandas as pd

# Load the dataset
df = pd.read_excel('Assignment Data.xlsx')
df.columns = ['Performance', 'VideoURL']


In [2]:
print(df)

     Performance                                           VideoURL
0       1.106000  https://fgimagestorage.blob.core.windows.net/f...
1       2.244700  https://fgimagestorage.blob.core.windows.net/f...
2       2.012600  https://fgimagestorage.blob.core.windows.net/f...
3       1.770800  https://fgimagestorage.blob.core.windows.net/f...
4       0.629300  https://fgimagestorage.blob.core.windows.net/f...
..           ...                                                ...
263     1.560931  https://fgimagestorage.blob.core.windows.net/f...
264     0.948489  https://fgimagestorage.blob.core.windows.net/f...
265     1.274918  https://fgimagestorage.blob.core.windows.net/f...
266     0.156167  https://fgimagestorage.blob.core.windows.net/f...
267     1.444243  https://fgimagestorage.blob.core.windows.net/f...

[268 rows x 2 columns]


In [None]:
!pip install opencv-python opencv-python-headless numpy scikit-learn tensorflow mtcnn


In [None]:
import cv2
import os

def extract_frames(video_path, output_dir, interval=30):
    """
    Extract frames from a video at a given interval.
    Args:
        video_path (str): Path to the video file.
        output_dir (str): Directory to save extracted frames.
        interval (int): Frame interval for extraction (default is every 30 frames).
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    success, frame = cap.read()

    while success:
        if frame_count % interval == 0:
            frame_filename = os.path.join(output_dir, f"frame_{frame_count}.jpg")
            cv2.imwrite(frame_filename, frame)
        success, frame = cap.read()
        frame_count += 1

    cap.release()

# Example usage
extract_frames("sample_video.mp4", "output_frames", interval=30)


In [None]:
from mtcnn import MTCNN
import cv2

detector = MTCNN()

def detect_faces(image_path):
    """
    Detect faces in an image using MTCNN.
    Args:
        image_path (str): Path to the image file.
    Returns:
        List of cropped face images.
    """
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    faces = detector.detect_faces(image_rgb)
    cropped_faces = []

    for face in faces:
        x, y, width, height = face['box']
        cropped_face = image_rgb[y:y+height, x:x+width]
        cropped_faces.append(cropped_face)

    return cropped_faces

# Example usage
faces = detect_faces("output_frames/frame_0.jpg")


In [None]:
from tensorflow.keras.models import load_model
import numpy as np

# Load FaceNet model
facenet_model = load_model('facenet_keras.h5')  # Download model from a trusted source

def preprocess_face(face_image, target_size=(160, 160)):
    """
    Preprocess the face image for FaceNet.
    Args:
        face_image (numpy array): Cropped face image.
        target_size (tuple): Target size for resizing (default is (160, 160)).
    Returns:
        Preprocessed face image as a NumPy array.
    """
    face_image = cv2.resize(face_image, target_size)
    face_image = face_image.astype('float32') / 255.0
    mean, std = face_image.mean(), face_image.std()
    face_image = (face_image - mean) / std
    return np.expand_dims(face_image, axis=0)

def generate_embedding(face_image):
    """
    Generate a 128-dimensional face embedding using FaceNet.
    Args:
        face_image (numpy array): Cropped face image.
    Returns:
        Face embedding as a NumPy array.
    """
    preprocessed_face = preprocess_face(face_image)
    embedding = facenet_model.predict(preprocessed_face)
    return embedding[0]

# Example usage
embedding = generate_embedding(faces[0])


In [None]:
from sklearn.cluster import DBSCAN

def cluster_faces(embeddings, eps=0.5, min_samples=2):
    """
    Cluster face embeddings using DBSCAN.
    Args:
        embeddings (list): List of face embeddings.
        eps (float): Maximum distance between samples for clustering (default 0.5).
        min_samples (int): Minimum samples to form a cluster (default 2).
    Returns:
        List of cluster labels for each embedding.
    """
    embeddings_array = np.array(embeddings)
    db = DBSCAN(eps=eps, min_samples=min_samples, metric='euclidean')
    cluster_labels = db.fit_predict(embeddings_array)
    return cluster_labels

# Example usage
embeddings = [generate_embedding(face) for face in faces]
cluster_labels = cluster_faces(embeddings)
print(f"Cluster labels: {cluster_labels}")


In [3]:
# Mock data after face detection & mapping
influencer_data = [
    {"Influencer": "Influencer_1", "Performance": [1.106, 2.344, 1.876]},
    {"Influencer": "Influencer_2", "Performance": [1.500, 1.700]},
]
df_influencer = pd.DataFrame(influencer_data)

# Calculate average performance
df_influencer['AveragePerformance'] = df_influencer['Performance'].apply(lambda x: sum(x) / len(x))


In [4]:
print(df_influencer)

     Influencer            Performance  AveragePerformance
0  Influencer_1  [1.106, 2.344, 1.876]            1.775333
1  Influencer_2             [1.5, 1.7]            1.600000


In [5]:
from IPython.display import display, HTML

# Mock data for display
influencers = [{"Name": "Influencer_1", "Average Performance": 1.75, "Face": "path_to_image1"},
               {"Name": "Influencer_2", "Average Performance": 1.6, "Face": "path_to_image2"}]

html = "<table><tr><th>Influencer</th><th>Face</th><th>Average Performance</th></tr>"
for i in influencers:
    html += f"<tr><td>{i['Name']}</td><td><img src='{i['Face']}' width=50></td><td>{i['Average Performance']}</td></tr>"
html += "</table>"

display(HTML(html))


Influencer,Face,Average Performance
Influencer_1,,1.75
Influencer_2,,1.6
