In [None]:
!pip install facenet-pytorch --force-reinstall --no-cache-dir

In [None]:
import torch
import pickle
from facenet_pytorch import InceptionResnetV1
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from scipy.spatial.distance import cosine

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

In [None]:
# load dictionary of {filename: embedding_vector}
with open("embeddings.pkl", "rb") as f:
    embeddings = pickle.load(f)

In [None]:
# access one entry
print(len(embeddings))                # number of samples
print(list(embeddings.keys())[:5])    # filenames
vec = embeddings["00001.jpg"]        # numpy array shape (512,)

In [None]:
# convert to torch tensor when used
emb_target = torch.tensor(vec).unsqueeze(0).cuda()  # shape [1,512]
print(emb_target.shape)

In [None]:
model = InceptionResnetV1(pretrained='vggface2').eval().to(device)
transform = transforms.Compose([
    transforms.Resize((160,160)),
    transforms.ToTensor()
])

In [None]:
img_00001 = Image.open("00001.jpg").convert("RGB")
x_00001 = transform(img_00001).unsqueeze(0).to(device)
img_00002 = Image.open("00002.jpg").convert("RGB")
x_00002 = transform(img_00002).unsqueeze(0).to(device)

In [None]:
emb_00001 = model(x_00001*2-1).detach().cpu().numpy()[0]
emb_00002 = model(x_00002*2-1).detach().cpu().numpy()[0]

In [None]:
img_00001

In [None]:
img_00002

In [None]:
img_00001.size, x.shape

In [None]:
random_data = np.random.randint(
    0, 256,
    size=(256, 256, 3),
    dtype=np.uint8
)
random_img = Image.fromarray(random_data, 'RGB')

In [None]:
random_img

In [None]:
random_image_tensor = transform(random_img).unsqueeze(0).to(device)

In [None]:
random_image_emb = model(random_image_tensor*2-1).detach().cpu().numpy()[0]

In [None]:
random_image_emb.shape, emb_00001.shape

In [None]:
cosine_dist = cosine(random_image_emb, emb_00001)
cosine_dist

Because the random image has no relationship with the face embedding, the distance is close to 1

In [None]:
cosine_dist = cosine(emb_00001, emb_00002)
cosine_dist

Because the 2 face embeddings are intentionally different, their distance is closer to 2 which means closer to opposite.

In [None]:
import torch
import torch.optim as optim
import torch.nn as nn
from torchvision import transforms
from facenet_pytorch import InceptionResnetV1
from PIL import Image

# Ensure random_image_tensor is on the correct device and requires grad
random_image_tensor = random_image_tensor.to(device).requires_grad_(True)

# Convert the target embedding (emb_00001) to a tensor and move to device
# Unsqueeze to add a batch dimension, matching the model output shape
target_embedding_tensor = torch.tensor(emb_00001, dtype=torch.float32).unsqueeze(0).to(device)

optimizer = optim.Adam([random_image_tensor], lr=0.01)

loss_fn = nn.MSELoss()

num_steps = 10000  # Try 500, 1000, or 5000

print("Starting optimization...")
for i in range(num_steps):
    # Clear old gradients
    optimizer.zero_grad()

    normalized_image_input = torch.tanh(random_image_tensor * 2 - 1)
    current_embedding = model(normalized_image_input)

    loss = loss_fn(current_embedding, target_embedding_tensor)

    loss.backward()

    optimizer.step()

    if (i + 1) % 100 == 0:
        print(f"Step [{i+1}/{num_steps}], Loss: {loss.item():.6f}")

print("Optimization finished.")

In [None]:
final_image_tensor = torch.tanh(random_image_tensor.detach().cpu().squeeze(0))
final_image_tensor = (final_image_tensor * 0.5) + 0.5  # Rescale from [-1, 1] to [0, 1]

final_image = transforms.ToPILImage()(final_image_tensor)
final_image.save("generated_face.png")
print("Saved generated image to generated_face.png")
final_image.show()