In [None]:
%pip install -U insightface onnxruntime-gpu  numpy fastai pillow

In [None]:
%pip install -U gdown moviepy ffmpeg insightface natsort hdbscan imutils 

In [None]:
!sudo apt install ffmpeg

In [None]:
import gdown
import cv2
import os
from moviepy.editor import VideoFileClip,AudioFileClip
import glob
import os.path as osp
import shutil
import insightface
from insightface.app import FaceAnalysis
from insightface.data import get_image as ins_get_image

In [None]:

# Define variables
drive_file_id = '1kz9-3Q-pn0VX5E8-0VWoJsNNwhTzZwCj'  # replace 'FILE_ID' with your file's ID
video_output = 'my_video.mp4'
frame_folder = '/notebooks/frame_folder'
swapped_folder = '/notebooks/swapped'
character_folder = '/notebooks/character'


In [None]:
shutil.rmtree(frame_folder)
shutil.rmtree(swapped_folder)

In [None]:
import cv2
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset

# Resize transformation
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

# Extract embeddings and images
embeddings = [face['face_embedding'] for face in face_data]
images = [cv2.cvtColor(face['image'], cv2.COLOR_BGR2RGB) for face in face_data]  # Convert from BGR to RGB
images = [transform(image) for image in images]  # Apply the transformation

# Convert to PyTorch tensors
embeddings = torch.stack([torch.Tensor(e) for e in embeddings])
images = torch.stack(images)

# Create a custom dataset
class CustomDataset(Dataset):
    def __init__(self, embeddings, images):
        self.embeddings = embeddings
        self.images = images

    def __len__(self):
        return len(self.embeddings)

    def __getitem__(self, idx):
        return self.embeddings[idx], self.images[idx]

# Create a DataLoader
dataset = CustomDataset(embeddings, images)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
class MultimodalAutoencoder(nn.Module):
    def __init__(self):
        super(MultimodalAutoencoder, self).__init__()
        
        # Encoder for embeddings
        self.embedding_encoder = nn.Sequential(
            nn.Linear(embeddings.shape[1], 128),
            nn.ReLU(True),
            nn.Linear(128, 64),
            nn.ReLU(True))
        
        # Encoder for images
        self.image_encoder = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=5),
            nn.ReLU(True),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, kernel_size=5),
            nn.ReLU(True),
            nn.MaxPool2d(2, 2),
            nn.Flatten(),
            nn.Linear(64*13*13, 128),
            nn.ReLU(True),
            nn.Linear(128, 64),
            nn.ReLU(True))

        # Decoder for embeddings
        self.embedding_decoder = nn.Sequential(
            nn.Linear(64, 128),
            nn.ReLU(True),
            nn.Linear(128, embeddings.shape[1]),
            nn.ReLU(True))
        
        # Decoder for images
        self.image_decoder = nn.Sequential(
            nn.Linear(64, 128*8*8),
            nn.ReLU(True),
            nn.Unflatten(1, (128, 8, 8)),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),
            nn.ReLU(True),
            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),  # Added upsampling layer
            nn.Conv2d(32, 3, kernel_size=1))

    def forward(self, x1, x2):
        x1 = self.embedding_encoder(x1)
        x2 = self.image_encoder(x2)
        x1 = self.embedding_decoder(x1)
        x2 = self.image_decoder(x2)
        return x1, x2

# Initialize the autoencoder and optimizer
model = MultimodalAutoencoder()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)
device = "cuda"
model = model.to(device)
# Train the autoencoder
num_epochs = 1000
print(embeddings.shape)  # print shape of original embeddings

for epoch in range(num_epochs):
    for embeddings, images in dataloader:
        embeddings = embeddings.to(device)
        images = images.to(device)
        embeddings_output, images_output = model(embeddings, images)
        loss_embeddings = criterion(embeddings_output, embeddings)
        loss_images = criterion(images_output, images)
        loss = loss_embeddings + loss_images
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print('epoch [{}/{}], loss:{:.4f}'.format(epoch+1, num_epochs, loss.item()))

In [None]:
# Save model
torch.save(model.state_dict(), 'model_weights.pth')

# Load model
model = MultimodalAutoencoder()
model.load_state_dict(torch.load('model_weights.pth'))
model.eval()  # Set the model to evaluation mode

In [None]:
embeddings = torch.stack([torch.Tensor(e) for e in embeddings])
model = model.to(device)
embeddings.to(device)
embeddings = model.embedding_encoder(embeddings)
reconstructed_images = model.image_decoder(embeddings)
reconstructed_images


In [None]:

import numpy as np
from PIL import Image

# assuming reconstructed_images is a tensor
reconstructed_images_np = reconstructed_images.cpu().detach().numpy()
reconstructed_images_np = np.transpose(reconstructed_images_np, (0, 2, 3, 1))  # convert from NCHW to NHWC
reconstructed_image = Image.fromarray(np.uint8(reconstructed_images_np[3] * 255))
reconstructed_image.show()

faces = app.get(np.uint8(reconstructed_images_np[0] * 255))
print(faces)

In [None]:
assert insightface.__version__ >= '0.7'


# Create the frame folder if it doesn't exist
if not os.path.exists(frame_folder):
    os.makedirs(frame_folder)
# Create the frame folder if it doesn't exist
if not os.path.exists(swapped_folder):
    os.makedirs(swapped_folder)
# Your face swapping script
app = FaceAnalysis()
app.prepare(ctx_id=0, det_size=(640, 640))
swapper = insightface.model_zoo.get_model('/notebooks/inswapper_128.onnx')

In [None]:


import numpy as np

def get_first_face(image):
    # If the image is a string (presumably a file path), read the image
    if isinstance(image, str):
        image = cv2.imread(image)

    # Check if the image is a valid numpy array
    if isinstance(image, np.ndarray):
        source_faces = app.get(image)
        source_faces = sorted(source_faces, key=lambda x: x.bbox[0])
        if len(source_faces) == 0:
            print(image)
            assert False

        return source_faces[0]
    print(image)
    assert False


In [None]:
import requests

url = "http://example.com/path_to_your_file"  # replace with your file's URL
response = requests.get(url)

with open(video_output, 'wb') as f:  # replace with the path where you want to save the file
    f.write(response.content)

In [None]:
# Download video from Google Drive
url = f'https://drive.google.com/uc?id={drive_file_id}'
gdown.download(url, video_output, quiet=False)

In [None]:
# Delete if swapped folder is not empty
if  os.path.exists(character_folder):
    shutil.rmtree(character_folder)

In [None]:
import cv2
import numpy as np
import insightface
from insightface.app import FaceAnalysis
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans
import pickle
import datetime
# Create the frame folder if it doesn't exist
if not os.path.exists(character_folder):
    os.makedirs(character_folder)
if not os.path.exists(frame_folder):
    os.makedirs(frame_folder)
# Initialize the FaceAnalysis application
""" app = FaceAnalysis(providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
app.prepare(ctx_id=0, det_size=(640, 640))  # Use GPU device 0 and input image size as (640, 640) """

# Open the video file
cap = cv2.VideoCapture(video_output)
# Calculate frame rate (frames per second)
fps = cap.get(cv2.CAP_PROP_FPS)
face_index= 0
# Initialize a list to store embeddings and face data for each detected face
face_data = []
count = 0
# Loop through the video file frame by frame
while True:
    ret, frame = cap.read()
    if not ret:
        break
        # Calculate the time in minutes and seconds
    time_in_milliseconds = cap.get(cv2.CAP_PROP_POS_MSEC)
    frame_time = datetime.timedelta(milliseconds=time_in_milliseconds)
    path = os.path.join(frame_folder, f"{count}.jpg")
    cv2.imwrite(path, frame)  # save frame as JPEG file
    # Use the FaceAnalysis application to detect faces in the frame
    faces = app.get(frame)

    # For each detected face, extract the embedding, bounding box, and face image, and add them to the list
    for face in faces:
        face_embedding = face.embedding
        bbox = face.bbox.astype(int)
        cropped_face = frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]
        if cropped_face.size > 0:
            face_data.append({'index':face_index,'label':'','frame':count,'time':frame_time,'face_embedding': face_embedding,'score':face.det_score,'normed_embedding': face.normed_embedding, 'bbox': bbox, 'image': cropped_face})
        face_index += 1
    
    count += 1

# Save the face_data to disk
with open('face_data.pkl', 'wb') as f:
    pickle.dump(face_data, f)


In [None]:

def is_similar(face1, face2, threshold=0.5):
    # Extract normalized embeddings
    embedding1 = face1.normed_embedding
    embedding2 = face2.normed_embedding

    # Compute the dot product (cosine similarity, because embeddings are normalized)
    similarity = np.dot(embedding1, embedding2)

    # Return True if the similarity is above the threshold, False otherwise
    return similarity > threshold

In [None]:
import pickle
class FaceDataManager:
    def __init__(self, file_path):
        self.file_path = file_path
        self.load_data()

    def load_data(self):
        with open(self.file_path, 'rb') as f:
            self.data = pickle.load(f)

    def save_data(self):
        with open(self.file_path, 'wb') as f:
            pickle.dump(self.data, f)

    def update_label(self, frame_number, face_image, new_label,save= True):
        for f in self.data:
            if f['frame'] == frame_number and np.array_equal(f['image'], face_image):
                f['label'] = new_label
        if save:
            self.save_data()
    def update_labels(self,old_label ,new_label):
        for f in self.data:
            if f['label'] == old_label:
                f['label'] = new_label
        self.save_data()
    
global face_data_manager
face_data_manager = FaceDataManager('face_data.pkl')

In [None]:

# Delete if swapped folder is not empty
if  os.path.exists(character_folder):
    shutil.rmtree(character_folder)

In [None]:
from scipy.sparse.csgraph import connected_components
from scipy.sparse import csr_matrix
import os
import numpy as np
import cv2
import pickle
from imutils import build_montages
from sklearn.cluster import DBSCAN
from hdbscan import HDBSCAN
from sklearn.preprocessing import normalize
from sklearn.cluster import AgglomerativeClustering
from sklearn.manifold import MDS
from hdbscan import HDBSCAN

# calculate similarity matrix
feats = []
for face in face_data_manager.data:
    feats.append(face["normed_embedding"])
feats = np.array(feats, dtype=np.float32)
sims = np.dot(feats, feats.T)
""" 
# define the similarity threshold
threshold = 0.7  # set your value

# create an adjacency matrix
adjacency = sims > threshold

# find connected components
n_components, labels = connected_components(csgraph=csr_matrix(adjacency), directed=False)
print(labels)
# print the number of groups
print(f'Total groups: {n_components}')
uniqueLabels = np.unique(labels)
print(uniqueLabels)
for label in uniqueLabels:
    # Skip the noise
    if label == -1:
        continue  

    directory_path = os.path.join(character_folder, str(label))
    if not os.path.exists(directory_path):
        os.makedirs(directory_path)

    # Select samples associated with the current label
    idxs = np.where(labels == label)[0]
    faces = []
    # loop over the sampled indexes
    for i in idxs:
        # Get the cropped face image from the 'image' key
        face = face_data_manager.data[i]["image"]
        frame = face_data_manager.data[i]["frame"]
        face_data_manager.data[i]['label'] = str(label)
        time_elapsed= face_data_manager.data[i]['time']
        # Calculate minutes and seconds
        minutes = time_elapsed.total_seconds() // 60
        seconds = time_elapsed.total_seconds() % 60

        # Format the time string as MM:SS
        frame_time = "{:02}:{:02}".format(int(minutes), int(seconds))
        # Force resize the face to 96x96 and then add it to the
        # faces montage list
        face = cv2.resize(face, (150, 150))
        cv2.putText(face,frame_time, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
        faces.append(face)

        # Save face image
        #image_path = os.path.join(directory_path, f"{i}.jpg")
        #cv2.imwrite(image_path, face)

    # Create a montage using 96x96 "tiles" with 5 rows and 5 columns
    montage = build_montages(faces, (150, 150), (10, 8))[0]
    # Save the output montage
    title = "Face ID #{}".format(label)
    title = "Unknown Faces" if label == -1 else title
    cv2.imwrite(os.path.join(directory_path, title+'.jpg'), montage)

    
face_data_manager.save_data() """

In [None]:
import os
import numpy as np
import cv2
import pickle
from imutils import build_montages
from sklearn.cluster import DBSCAN
from hdbscan import HDBSCAN
from sklearn.preprocessing import normalize
from sklearn.preprocessing import normalize


# Check if the directories exist
if not os.path.exists(character_folder):
    os.makedirs(character_folder)


""" times = np.array([d['time'].total_seconds() for d in face_data_manager.data])
times = times / np.max(times)
scores """
embeddings = [d['normed_embedding'] for d in face_data_manager.data]
embeddings = normalize(embeddings, norm='l2')

clustering = DBSCAN(eps=0.5, min_samples=5,metric="cosine")
clustering.fit(embeddings)

labels = np.unique(clustering.labels_)
print(labels)
for label in clustering.labels_:
    # Skip the noise
    if label == -1:
        continue  

    directory_path = os.path.join(character_folder, str(label))
    if not os.path.exists(directory_path):
        os.makedirs(directory_path)

    # Select samples associated with the current label
    idxs = np.where(clustering.labels_ == label)[0]
    faces = []
    # loop over the sampled indexes
    for i in idxs:
        face_data_manager.data[i]['label'] = str(label)
    idxs = np.random.choice(idxs, size=min(25, len(idxs)), replace=False)
    for i in idxs:
        # Get the time elapsed
        time_elapsed = face_data_manager.data[i]['time']
        minutes = time_elapsed.total_seconds() // 60
        seconds = time_elapsed.total_seconds() % 60
        # Calculate minutes and seconds
        minutes = time_elapsed.total_seconds() // 60
        seconds = time_elapsed.total_seconds() % 60
        # Get the face from the data
        face = face_data_manager.data[i]["image"]
        # Format the time string as MM:SS
        frame_time = "{:02}:{:02}".format(int(minutes), int(seconds))
        # Force resize the face to 96x96 and then add it to the
        # faces montage list
        face = cv2.resize(face, (150, 150))
        cv2.putText(face,frame_time, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
        faces.append(face)
    montage = build_montages(faces, (150, 150), (5, 5))[0]
    # Save the output montage
    title = "Face ID #{}".format(label)
    title = "Unknown Faces" if label == -1 else title
    cv2.imwrite(os.path.join(directory_path, title+'.jpg'), montage)
    
face_data_manager.save_data()


In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output
from fastai.vision.all import *
from pathlib import Path
from PIL import Image as PilImage
import shutil

path = Path(character_folder)
labels = [data["label"] for data in face_data_manager.data if data["label"].isnumeric()]

def rename_folder(old_name, new_name):
    if new_name.exists():  # If the target directory exists
     
        shutil.rmtree(old_name)
    else:
        old_name.rename(new_name)
        
def create_on_button_clicked_handler(label, name):
    global face_data_manager
    def on_button_clicked(b):
        global face_data_manager
        new_label = name.value
        print(f'Character name confirmed: {new_label}')

        face_data_manager.update_labels(label, new_label)

        old_name = Path(Path(path) / label)
        new_name = Path(Path(path) / new_label)

        rename_folder(old_name, new_name)

        clear_output(wait=True)
        next(process)

    return on_button_clicked
def process_folders(labels, path):
    global face_data_manager
    for label in labels:
        image_paths = list((path / label).glob('*.jpg'))
        if not image_paths:
            continue
        print(f"Current cluster: {label}")

        image_resolution = {}
        for image_path in image_paths:
            with PilImage.open(image_path) as img:
                width, height = img.size
                resolution = width * height  # resolution is defined as width * height
                image_resolution[image_path] = resolution

        sorted_image_paths = sorted(image_resolution, key=image_resolution.get, reverse=True)
        name = widgets.Text(value=label, placeholder='Enter character name', description='Name:')
        button = widgets.Button(description='Confirm')
        display(name, button)
        with open(sorted_image_paths[0], "rb") as file:
            image = file.read()
            img_widget = widgets.Image(value=image, format='png', width=1000, height=400)
            display(img_widget)

     


        button.on_click(create_on_button_clicked_handler(label, name))
  
        yield

process = process_folders(labels, path)
next(process)

In [None]:
for data in face_data_manager.data:
    if data["label"].isnumeric():
        print(data["index"],"is numeric")

In [None]:

import re
from ipywidgets import HBox, VBox
import ipywidgets as widgets
from IPython.display import display, clear_output
from fastai.vision.all import *
from pathlib import Path
from PIL import Image as PilImage
import shutil
from ipywidgets import GridBox

path = Path(character_folder)
processed_images = glob.glob(os.path.join(frame_folder, '*.jpg'))
processed_images.sort(key=lambda f: int(re.sub('\D', '', os.path.basename(f))))
def on_button_clicked_factory(face, frame_number, name, path, character_folder):
    global face_data_manager
    def on_button_clicked(b):
        global face_data_manager
        label = name.value
        face_data_manager.update_label(frame_number, face['image'], label)
        """  old_name = Path(path / str(face['label']))
            new_name = Path(path / label)
            if new_name.exists():  
                # Copy all .jpg files in the current folder to the target folder
                for file in old_name.glob('*.jpg'):
                    if new_name != old_name:
                        shutil.copy(file, new_name / file.name)
                    # Copy all .npy files (embeddings) in the current folder to the target folder
                for file in old_name.glob('*.npy'):
                    if new_name != old_name:
                        shutil.copy(file, new_name / file.name) """


        """  else:
                if not os.path.exists(new_name):
                    os.mkdir(new_name)
                old_name.rename(new_name) """
        """     process_frame(first_frame_with_label_0) """
        # At the start of your code, add the following line to get the first frame that has a label '0'
    """     first_frame_with_label_0 = min([f['frame'] for f in face_data_manager.data if f['label'] == '0' or f['label'] == ''], default=0)
  
        slider.value = first_frame_with_label_0 """
    return on_button_clicked

def process_frame(frame_number):
    global face_data_manager
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)

    frame_path = processed_images[frame_number]
    frame = cv2.imread(str(frame_path))
    

    frame_widget = convert_frame_to_widget(frame)

    frame_output.clear_output()

    with frame_output:
        print(f"Frame number: {frame_number}")

        faces = [p for p in face_data_manager.data if p["frame"] == int(frame_number)]
        face_widgets = process_faces(faces,frame_number)

        display(HBox([frame_widget] + face_widgets))
        labels = set(face['label'] for face in face_data_manager.data)
        image_boxes = [display_images(label) for label in labels]

        grid = widgets.GridBox(image_boxes, layout=widgets.Layout(grid_template_columns="repeat(4, 250px)"))
        display(grid)

def convert_frame_to_widget(frame):
    frame_pil = PilImage.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    base_width = 500
    w_percent = (base_width / float(frame_pil.size[0]))
    h_size = int((float(frame_pil.size[1]) * float(w_percent)))
    frame_pil = frame_pil.resize((base_width, h_size))

    img_byte_arr = io.BytesIO()
    frame_pil.save(img_byte_arr, format='PNG')
    return widgets.Image(value=img_byte_arr.getvalue(), format='png')


def process_faces(faces, frame_number):
    face_widgets = []
    for face in faces:
        print(f"Time: {face['time']}")
        cropped_face_widget = convert_face_to_widget(face['image'])
        name = widgets.Text(value=str(face['label']), placeholder='Enter character name', description='Name:')
        button = widgets.Button(description='Confirm')

        on_button_clicked = on_button_clicked_factory(face, frame_number, name, path, character_folder)
        button.on_click(on_button_clicked)
        face_box = VBox([cropped_face_widget, name, button])
        face_widgets.append(face_box)
    return face_widgets


def convert_face_to_widget(face):
    face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)  # Add this line to convert the color space
    cropped_face_pil = PilImage.fromarray(face)
    cropped_face_pil = cropped_face_pil.resize((200, 200))
    img_byte_arr = io.BytesIO()
    cropped_face_pil.save(img_byte_arr, format='PNG')
    return widgets.Image(value=img_byte_arr.getvalue(), format='jpg')
def calculate_centroid(embeddings):
    return np.mean(embeddings, axis=0)
def display_images(label):
    # Find the central image for this label
    central_image = find_central_images(label)

    # Convert the image to a widget
    central_image_widget = convert_face_to_widget(central_image)

    # Return a box containing the image widget
    return VBox([widgets.Label(f"Label: {label}"), central_image_widget])
def find_central_images(label):
    # Gather all embeddings for this label
    label_embeddings = [d['normed_embedding'] for d in face_data_manager.data if d['label'] == str(label)]
    # Calculate the centroid of these embeddings
    centroid = calculate_centroid(label_embeddings)

    # Calculate the distance of each image's embedding to the centroid
    distances = [np.linalg.norm(e - centroid) for e in label_embeddings]

    # Find the index of the minimum distance
    min_index = np.argmin(distances)

    # Return the image corresponding to this index
    central_image = [d['image'] for d in face_data_manager.data if d['label'] == str(label)][min_index]

    return central_image

# Load video
cap = cv2.VideoCapture(video_output)
total_frames = int(len(processed_images))

# Define an output widget for the frame display
frame_output = widgets.Output()

# At the start of your code, add the following line to get the first frame that has a label '0'
first_frame_with_label_0 = min([f['frame'] for f in face_data_manager.data if f['label'] == '0' or f['label'] == ''], default=0)
fps = cap.get(cv2.CAP_PROP_FPS)
# Define a slider to navigate frames
slider = widgets.IntSlider(min=0, max=int(total_frames-1), step=fps, description='Frame:')

def on_slider_change(change):
    process_frame(change['new'])

slider.observe(on_slider_change, names='value')

# Display the slider and frame_output widget
display(slider, frame_output)
slider.value = first_frame_with_label_0
# Process the first frame with label '0'
""" process_frame(first_frame_with_label_0) """

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import insightface
from insightface.app import FaceAnalysis
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer

""" data = [] """
""" for label in os.listdir(character_folder):
    for filename in os.listdir(os.path.join(character_folder, label)):
        if filename.endswith('.npy'):  # check if the file is an image
            # read the image
            img_path = os.path.join(character_folder, label, filename)
            embedding = np.load(img_path)
            data.append({'embedding': embedding, 'label': label})
                
 """
# create a dataframe
df = pd.DataFrame(face_data_manager.data)

# Separate features and target
X = np.array(df['normed_embedding'].to_list()) # Convert list of embeddings back to numpy array
y = df['label'].values

# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)
# Normalize the embeddings
normalizer = Normalizer(norm='l2')
X = normalizer.transform(X)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y_encoded, test_size=0.2)

# Define the model
knn = KNeighborsClassifier(n_neighbors=5,metric="cosine",weights="distance")

# Train the model
knn.fit(X_train, y_train)

# Validate the model
accuracy = knn.score(X_val, y_val)
print(f'Validation accuracy: {accuracy}')
def classify_face(embedding):
    normalizer = Normalizer(norm='l2')
    embedding = normalizer.transform([embedding])
    predicted_label = knn.predict(embedding)
    predicted_label_str = le.inverse_transform(predicted_label)[0]

    # Additional similarity check
    similarity_threshold = 0.34
    predicted_class_embeddings = X_train[y_train == predicted_label[0]]
    sims_to_predicted_class = np.dot(predicted_class_embeddings, embedding[0])
    if np.mean(sims_to_predicted_class) < similarity_threshold:
        return "Unknown"

    return predicted_label_str


In [None]:
from fastai.tabular.all import *
import os
import sys

class SuppressPrints:
    def __enter__(self):
        self.original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self.original_stdout
# Create a dataframe from your data
df = pd.DataFrame(face_data_manager.data)

# Separate features and target
X = np.array(df['normed_embedding'].to_list())  # Convert list of embeddings back to numpy array
y = df['label'].values

# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Create a DataFrame for use with Fastai
df_fastai = pd.DataFrame(X)
df_fastai['label'] = y_encoded

# Define categorical and continuous variables
# We don't have any categorical variables in this case, so the list is empty
cat_names = []
# All the columns of embeddings are treated as continuous variables
cont_names = list(df_fastai.columns[:-1])  # Exclude 'label' column

# Define your splits for training and validation sets
splits = TrainTestSplitter(test_size=0.2)(range_of(df_fastai))

# Create a TabularDataLoaders
dls = TabularDataLoaders.from_df(df_fastai, y_names="label", y_block = CategoryBlock, 
                                 cat_names=cat_names, cont_names=cont_names, splits=splits, bs=64)

# Define the model architecture
# Here we use a simple feedforward neural network with two hidden layers of size 200 and 100.
learn = tabular_learner(dls, layers=[200,100], metrics=accuracy)

# Train the model
learn.fit_one_cycle(5)

def classify_face(embedding):
    # Convert the embedding to DataFrame
    df_new = pd.DataFrame([embedding], columns=cont_names)
    # Use the trained model to make the prediction
    with SuppressPrints():
        pred_class, pred_idx, _ = learn.predict(df_new.iloc[0])
    # Convert predicted index to integer
    predicted_label = int(pred_idx)
    predicted_label_str = le.inverse_transform([predicted_label])[0]

    # Additional similarity check
    similarity_threshold = 0.34
    predicted_class_embeddings = X[y == int(predicted_label)]
    sims_to_predicted_class = np.dot(predicted_class_embeddings, embedding)
    if np.mean(sims_to_predicted_class) < similarity_threshold:
        return "Unknown"

    return predicted_label_str

In [None]:
if not os.path.exists(frame_folder):
    os.makedirs(frame_folder)
# Extract frames from video
vidcap = cv2.VideoCapture(video_output)
success, image = vidcap.read()
count = 0
target_imgs = []  # a list of file paths to images of the target faces
while success:
    path = os.path.join(frame_folder, f"{count}.jpg")
    target_imgs.append(path)
    cv2.imwrite(path, image)  # save frame as JPEG file
    success, image = vidcap.read()
    count += 1

In [None]:
# Delete if swapped folder is not empty
if  os.path.exists(swapped_folder):
    shutil.rmtree(swapped_folder)

In [None]:
# Create the frame folder if it doesn't exist
if not os.path.exists(swapped_folder):
    os.makedirs(swapped_folder)

In [None]:
import collections
import random
import natsort
import cv2
from collections import Counter, deque
import matplotlib.pyplot as plt
from imutils import build_montages

swappings ={
   'm':get_first_face('/notebooks/1629085462605.jpeg'),
   'b':get_first_face('/notebooks/bcp.png'),
   'd':get_first_face('/notebooks/DSCAAZZEA.png'),
   'yd':get_first_face('/notebooks/2023-07-07 13.36.17.jpg'),
   'ld': get_first_face('/notebooks/DSC09400.jpg'),
   'l2':get_first_face('/notebooks/2023-07-03 22.46.08.jpg'),
   'ylo':get_first_face('/notebooks/2023-07-07 13.36.21.jpg'),
   'la':get_first_face('/notebooks/2023-07-03 22.47.10.jpg'),
   'jo':get_first_face('/notebooks/jocp.png'),
   'g':get_first_face('/notebooks/2023-07-08 14.11.53.jpg'),
   'je':get_first_face('/notebooks/2023-07-03 22.47.24.jpg'),
   'cat': get_first_face('/notebooks/http___prod.static9.net.au___media_2018_07_09_09_54_catto.jpg'),
}
targets={
 '0': 'd',
 '': None,
 'Unknown': None,
 'Unknow': None,
}

source_face = get_first_face('/notebooks/DSC06729.JPG')

attribution = {}
unused_keys = list(swappings.keys())
for key in list(targets.keys()):
    value = targets[key]
    if value is not None:
        unused_keys.remove(value)

video = cv2.VideoCapture(video_output)
fps = video.get(cv2.CAP_PROP_FPS)
smoothing_window_size = 5
face_classification_history = collections.defaultdict(lambda: deque(maxlen=int(fps)+1))

start_frame = None
end_frame = None
frame_count = -1
DEBUG = False
results = []  # List to hold face images for montage

while True:
    ret, frame = video.read()
    if not ret:
        break  
    frame_count += 1
    if start_frame is not None and frame_count < start_frame:
        continue
    if end_frame is not None and frame_count > end_frame:
        break

    faces = app.get(frame)
    """     faces = sorted(faces, key=lambda x: x.bbox[0]) """
    res = frame.copy()

    used_indices = set()  # Keep track of used indices in this frame
    face_info = []  # List to store face info (index and position)
    for face in faces:
        index = classify_face(face.normed_embedding)
        if index in used_indices:
            continue
        used_indices.add(index)  # Mark this index as used
        """         face_classification_history[index].append(index)
                counter = Counter(face_classification_history[index])
                most_common_index = counter.most_common(1)[0][0] """
        most_common_index = index
        found = False
        bbox = face.bbox.astype(int)
        face_info.append((index,bbox))
        # Store index and position for later use
        """         cropped_face = frame[bbox[1]:bbox[3], bbox[0]:bbox[2]]
                if cropped_face.size < 0:
                    continue """
        """         if face.det_score < 0.5:
                    continue """
        if most_common_index in targets:
            found = True
            value = targets[most_common_index]
            if value is not None:
                res = swapper.get(res, face, swappings[value], paste_back=True)
        if not found:
            if most_common_index in attribution:
                found = True
                value = swappings[attribution[most_common_index]]
                if value is not None:
                    res = swapper.get(res, face, swappings[attribution[most_common_index]], paste_back=True)
            else:
                print("index not found :" ,most_common_index)
        if not found and index != "Unknown":
            if len(unused_keys) == 0:  
                unused_keys = list(swappings.keys())
                for key in list(targets.keys()):
                    if key in unused_keys:
                        unused_keys.remove(key)
            random_key = random.choice(unused_keys)
            unused_keys.remove(random_key)
            print("index ",most_common_index," attribute to ",random_key, " frame ", frame_count)
            attribution[most_common_index] = random_key
            random_value = swappings[random_key]
            res = swapper.get(res, face, random_value, paste_back=True)
    cv2.imwrite(osp.join(swapped_folder, '{}.jpg'.format(frame_count)), res)
    if DEBUG:
        debug = res.copy()
        for info in face_info:
            index, bbox = info
            new_bbox = bbox 
            cv2.putText(debug, str(index), (int(new_bbox[0]), int(new_bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 3, cv2.LINE_AA)
        debug =  cv2.resize(debug,  (400, 400), interpolation = cv2.INTER_AREA)

        cv2.putText(debug,str(frame_count), (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2, cv2.LINE_AA)
        cv2.putText(debug,str(len(face_info)), (350, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2, cv2.LINE_AA)
        results.append(debug)
        # Every fps number of frames, create a montage and display it
        if frame_count % int(fps) == 0:
            montage = build_montages(results, (150, 150), (5, 5))[0]
            # Calculate time elapsed in the video
            time_elapsed = frame_count / fps  # time in seconds
            minutes = int(time_elapsed // 60)
            seconds = int(time_elapsed % 60)
            print(f"Current time in video: {minutes:02}:{seconds:02}")
            # Clear previous output and display the image
            """  clear_output(wait=True) """
            plt.figure(figsize=(400,400))
            plt.imshow(cv2.cvtColor(montage, cv2.COLOR_BGR2RGB))
            plt.axis('off')
            plt.show()
            
            results = []  # Clear the faces list
    
video.release()

In [None]:
processed_images = glob.glob(os.path.join(swapped_folder, '0*.jpg'))
for file in processed_images:
    os.remove(file)

In [None]:
import subprocess

output_video = "/notebooks/output.mp4"
video = cv2.VideoCapture(video_output)
fps = video.get(cv2.CAP_PROP_FPS)
video.release()

# Get the processed images
processed_images = glob.glob(os.path.join(swapped_folder, '*.jpg'))

# Sort the processed images (this may be necessary depending on how your files are named)
processed_images.sort()
print(len(processed_images))
# Initialize the video writer
height, width, _ = cv2.imread(processed_images[0]).shape
print(height, width)

# Define the command
command = f'ffmpeg -y -r {fps} -s {width}x{height} -i {swapped_folder}/%01d.jpg -vcodec libx264  -pix_fmt yuv420p {output_video}'

# Execute the command
subprocess.call(command, shell=True)

In [None]:
import subprocess


temp_audio_file = '/notebooks/temp_audio.aac'

# Remove temporary audio file if it exists
if os.path.exists(temp_audio_file):
    os.remove(temp_audio_file)

# Remove output video with audio file if it exists
output_with_audio_file = '/notebooks/output_with_audio.mp4'
if os.path.exists(output_with_audio_file):
    os.remove(output_with_audio_file)

# Extract audio from original video and save it as a temporary audio file
audio_extraction_command = f'ffmpeg -y -i {video_output} -vn -acodec aac -strict -2 {temp_audio_file}'
subprocess.run(audio_extraction_command, shell=True)

# Combine swapped video with original audio
video_combination_command = f'ffmpeg -y -i {output_video} -i {temp_audio_file} -c:v copy -c:a copy -map 0:v:0 -map 1:a:0 {output_with_audio_file}'
subprocess.run(video_combination_command, shell=True)

In [None]:
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip

# path to the video file
video_path = output_with_audio_file

# specify the start and end times in seconds
# start time as (minutes, seconds)
start_time_min_sec = (0, 0)  # 2 minutes 30 seconds
start_time = start_time_min_sec[0]*60 + start_time_min_sec[1]

# end time as (minutes, seconds)
end_time_min_sec = (1, 40)  # 3 minutes 45 seconds
end_time = end_time_min_sec[0]*60 + end_time_min_sec[1]

# output file path
output_path = "/notebooks/split.mp4"

# extract subclip
ffmpeg_extract_subclip(video_path, start_time, end_time, targetname=output_path)

CONVERT GIF

In [None]:
import cv2
import imageio

# Read the gif file
gif = imageio.mimread('input.gif')

# Convert the gif to .mp4
with imageio.get_writer('output.mp4', mode='I') as writer:
    for frame in gif:
        writer.append_data(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))