In [None]:
#@title Utilities

import numpy as np
import PIL
import PIL.Image
import scipy
import scipy.ndimage
import dlib


def get_landmark(filepath, predictor):
    """get landmark with dlib
    :return: np.array shape=(68, 2)
    """
    detector = dlib.get_frontal_face_detector()

    img = dlib.load_rgb_image(filepath)
    dets = detector(img, 1)

    for k, d in enumerate(dets):
        shape = predictor(img, d)

    t = list(shape.parts())
    a = []
    for tt in t:
        a.append([tt.x, tt.y])
    lm = np.array(a)
    return lm


def align_face(filepath, predictor):
    """
    :param filepath: str
    :return: PIL Image
    """

    lm = get_landmark(filepath, predictor)

    lm_chin = lm[0: 17]  # left-right
    lm_eyebrow_left = lm[17: 22]  # left-right
    lm_eyebrow_right = lm[22: 27]  # left-right
    lm_nose = lm[27: 31]  # top-down
    lm_nostrils = lm[31: 36]  # top-down
    lm_eye_left = lm[36: 42]  # left-clockwise
    lm_eye_right = lm[42: 48]  # left-clockwise
    lm_mouth_outer = lm[48: 60]  # left-clockwise
    lm_mouth_inner = lm[60: 68]  # left-clockwise

    # Calculate auxiliary vectors.
    eye_left = np.mean(lm_eye_left, axis=0)
    eye_right = np.mean(lm_eye_right, axis=0)
    eye_avg = (eye_left + eye_right) * 0.5
    eye_to_eye = eye_right - eye_left
    mouth_left = lm_mouth_outer[0]
    mouth_right = lm_mouth_outer[6]
    mouth_avg = (mouth_left + mouth_right) * 0.5
    eye_to_mouth = mouth_avg - eye_avg

    # Choose oriented crop rectangle.
    x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
    x /= np.hypot(*x)
    x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8)
    y = np.flipud(x) * [-1, 1]
    c = eye_avg + eye_to_mouth * 0.1
    quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
    qsize = np.hypot(*x) * 2

    # read image
    img = PIL.Image.open(filepath)

    output_size = 256
    transform_size = 256
    enable_padding = True

    # Shrink.
    shrink = int(np.floor(qsize / output_size * 0.5))
    if shrink > 1:
        rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink)))
        try:
            resample = Image.Resampling.LANCZOS
        except AttributeError:
            resample = Image.LANCZOS  # older versions
        img = img.resize(rsize, resample)
        quad /= shrink
        qsize /= shrink

    # Crop.
    border = max(int(np.rint(qsize * 0.1)), 3)
    crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
            int(np.ceil(max(quad[:, 1]))))
    crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]),
            min(crop[3] + border, img.size[1]))
    if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]:
        img = img.crop(crop)
        quad -= crop[0:2]

    # Pad.
    pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
           int(np.ceil(max(quad[:, 1]))))
    pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0),
           max(pad[3] - img.size[1] + border, 0))
    if enable_padding and max(pad) > border - 4:
        pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
        img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
        h, w, _ = img.shape
        y, x, _ = np.ogrid[:h, :w, :1]
        mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w - 1 - x) / pad[2]),
                          1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h - 1 - y) / pad[3]))
        blur = qsize * 0.02
        img += (scipy.ndimage.gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
        img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0)
        img = PIL.Image.fromarray(np.uint8(np.clip(np.rint(img), 0, 255)), 'RGB')
        quad += pad[:2]

    # Transform.
    img = img.transform((transform_size, transform_size), PIL.Image.QUAD, (quad + 0.5).flatten(), PIL.Image.BILINEAR)
    if output_size < transform_size:
        img = img.resize((output_size, output_size), PIL.Image.ANTIALIAS)

    # Return aligned image.
    return img

In [None]:
!pip install PyDrive

Collecting PyDrive
  Downloading PyDrive-1.3.1.tar.gz (987 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/987.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━[0m [32m532.5/987.4 kB[0m [31m13.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m987.4/987.4 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: PyDrive
  Building wheel for PyDrive (setup.py) ... [?25l[?25hdone
  Created wheel for PyDrive: filename=PyDrive-1.3.1-py3-none-any.whl size=27433 sha256=93f3ef0e4faebfaefaae9c412b0e459ec6499767bbf8d16c09f7b842feb00520
  Stored in directory: /root/.cache/pip/wheels/31/d5/09/88865e0059104686eb8365ca1d36a8b27deef34232c3b62c90
Successfully built PyDrive
Installing collected packages: PyDrive
Successfully installed PyDrive-1.3.1


In [None]:
#@title Setup Repository
import os
os.chdir('/content')
CODE_DIR = 'encoder4editing'

!git clone https://github.com/omertov/encoder4editing.git $CODE_DIR
!wget https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip
!sudo unzip ninja-linux.zip -d /usr/local/bin/
!sudo update-alternatives --install /usr/bin/ninja ninja /usr/local/bin/ninja 1 --force
os.chdir(f'./{CODE_DIR}')

from argparse import Namespace
import time
import os
import sys
import numpy as np
from PIL import Image
import torch
import torchvision.transforms as transforms

sys.path.append(".")
sys.path.append("..")

from utils.common import tensor2im
from models.psp import pSp  # we use the pSp framework to load the e4e encoder.

%load_ext autoreload
%autoreload 2

#@title Setup files downloader
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

download_with_pydrive = False #@param {type:"boolean"}

class Downloader(object):
    def __init__(self, use_pydrive):
        self.use_pydrive = use_pydrive
        current_directory = os.getcwd()
        self.save_dir = os.path.join(os.path.dirname(current_directory), CODE_DIR, "pretrained_models")
        os.makedirs(self.save_dir, exist_ok=True)
        if self.use_pydrive:
            self.authenticate()

    def authenticate(self):
        auth.authenticate_user()
        gauth = GoogleAuth()
        gauth.credentials = GoogleCredentials.get_application_default()
        self.drive = GoogleDrive(gauth)

    def download_file(self, file_id, file_name):
        file_dst = f'{self.save_dir}/{file_name}'
        if os.path.exists(file_dst):
            print(f'{file_name} already exists!')
            return
        if self.use_pydrive:
            downloaded = self.drive.CreateFile({'id':file_id})
            downloaded.FetchMetadata(fetch_all=True)
            downloaded.GetContentFile(file_dst)
        else:
            !gdown --id $file_id -O $file_dst

downloader = Downloader(download_with_pydrive)

experiment_type = 'ffhq_encode' #@param ['ffhq_encode', 'cars_encode', 'horse_encode', 'church_encode']

#@title Download
MODEL_PATHS = {
    "ffhq_encode": {"id": "1cUv_reLE6k3604or78EranS7XzuVMWeO", "name": "e4e_ffhq_encode.pt"},
    "cars_encode": {"id": "17faPqBce2m1AQeLCLHUVXaDfxMRU2QcV", "name": "e4e_cars_encode.pt"},
    "horse_encode": {"id": "1TkLLnuX86B_BMo2ocYD0kX9kWh53rUVX", "name": "e4e_horse_encode.pt"},
    "church_encode": {"id": "1-L0ZdnQLwtdy6-A_Ccgq5uNJGTqE7qBa", "name": "e4e_church_encode.pt"}
}

path = MODEL_PATHS[experiment_type]
downloader.download_file(file_id=path["id"], file_name=path["name"])

EXPERIMENT_DATA_ARGS = {
    "ffhq_encode": {
        "model_path": "pretrained_models/e4e_ffhq_encode.pt",
        "image_path": "notebooks/images/input_img.jpg"
    },
    "cars_encode": {
        "model_path": "pretrained_models/e4e_cars_encode.pt",
        "image_path": "notebooks/images/car_img.jpg"
    },
    "horse_encode": {
        "model_path": "pretrained_models/e4e_horse_encode.pt",
        "image_path": "notebooks/images/horse_img.jpg"
    },
    "church_encode": {
        "model_path": "pretrained_models/e4e_church_encode.pt",
        "image_path": "notebooks/images/church_img.jpg"
    }

}
# Setup required image transformations
EXPERIMENT_ARGS = EXPERIMENT_DATA_ARGS[experiment_type]
if experiment_type == 'cars_encode':
    EXPERIMENT_ARGS['transform'] = transforms.Compose([
            transforms.Resize((192, 256)),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
    resize_dims = (256, 192)
else:
    EXPERIMENT_ARGS['transform'] = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
    resize_dims = (256, 256)

if experiment_type == "ffhq_encode" and 'shape_predictor_68_face_landmarks.dat' not in os.listdir():
    !wget http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
    !bzip2 -dk shape_predictor_68_face_landmarks.dat.bz2

def run_alignment(image_path):
  import dlib
  predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
  aligned_image = align_face(filepath=image_path, predictor=predictor)
#   print("Aligned image has shape: {}".format(aligned_image.size))
  return aligned_image

def run_on_batch(inputs, net):
    images, latents = net(inputs.to("cuda").float(), randomize_noise=False, return_latents=True)
    if experiment_type == 'cars_encode':
        images = images[:, :, 32:224, :]
    return images, latents

def preprocess(image_path):
    if experiment_type == "ffhq_encode":
        input_image = run_alignment(image_path)
    else:
        original_image = Image.open(image_path)
        original_image = original_image.convert("RGB")
        input_image = original_image

    return input_image

def inference(preprocessed_image):
    img_transforms = EXPERIMENT_ARGS['transform']
    transformed_image = img_transforms(preprocessed_image)
    with torch.no_grad():
        images, latents = run_on_batch(transformed_image.unsqueeze(0), net)
        result_image, latent = images[0], latents[0]
        # flatten the latent
        latent = latent.flatten()

    return result_image, latent

def process(image_path):
    preprocessed_image = preprocess(image_path)
    result_image, latent = inference(preprocessed_image)
    return result_image, latent

Cloning into 'encoder4editing'...
remote: Enumerating objects: 172, done.[K
remote: Counting objects: 100% (78/78), done.[K
remote: Compressing objects: 100% (36/36), done.[K
remote: Total 172 (delta 49), reused 42 (delta 42), pack-reused 94 (from 1)[K
Receiving objects: 100% (172/172), 33.43 MiB | 33.93 MiB/s, done.
Resolving deltas: 100% (59/59), done.
--2025-08-09 09:57:00--  https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip
Resolving github.com (github.com)... 140.82.116.3
Connecting to github.com (github.com)|140.82.116.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://release-assets.githubusercontent.com/github-production-release-asset/1335132/d2f252e2-9801-11e7-9fbf-bc7b4e4b5c83?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-08-09T10%3A50%3A36Z&rscd=attachment%3B+filename%3Dninja-linux.zip&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025

If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].
If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].


Downloading...
From (original): https://drive.google.com/uc?id=1cUv_reLE6k3604or78EranS7XzuVMWeO
From (redirected): https://drive.google.com/uc?id=1cUv_reLE6k3604or78EranS7XzuVMWeO&confirm=t&uuid=868ffd93-4f91-46ef-9785-339691141e5f
To: /content/encoder4editing/pretrained_models/e4e_ffhq_encode.pt
100% 1.20G/1.20G [00:13<00:00, 88.3MB/s]
--2025-08-09 09:59:05--  http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
Resolving dlib.net (dlib.net)... 107.180.26.78
Connecting to dlib.net (dlib.net)|107.180.26.78|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 [following]
--2025-08-09 09:59:05--  https://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
Connecting to dlib.net (dlib.net)|107.180.26.78|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 64040097 (61M)
Saving to: ‘shape_predictor_68_face_landmarks.dat.bz2’


2025-08-09 09:59:08 (32.5

# Load Pretrained Encoder

In [None]:
model_path = EXPERIMENT_ARGS['model_path']
ckpt = torch.load(model_path, map_location='cpu')
opts = ckpt['opts']
# pprint.pprint(opts)  # Display full options used
# update the training options
opts['checkpoint_path'] = model_path
opts= Namespace(**opts)
net = pSp(opts)
net.eval()
net.cuda()
print('Model successfully loaded!')

Loading e4e over the pSp framework from checkpoint: pretrained_models/e4e_ffhq_encode.pt
Model successfully loaded!


In [None]:
import pprint
pprint.pprint(opts)  # Display full options used

Namespace(exp_dir=None, dataset_type='ffhq_encode', encoder_type='Encoder4Editing', batch_size=8, test_batch_size=4, workers=8, test_workers=4, learning_rate=0.0001, optim_name='ranger', train_decoder=False, start_from_latent_avg=True, lpips_lambda=0.8, id_lambda=0.1, l2_lambda=1.0, stylegan_weights='', stylegan_size=1024, checkpoint_path='pretrained_models/e4e_ffhq_encode.pt', max_steps=300000, image_interval=100, board_interval=50, val_interval=10000, save_interval=200000, w_discriminator_lambda=0.1, w_discriminator_lr=2e-05, r1=10, d_reg_every=16, use_w_pool=True, w_pool_size=50, sub_exp_dir=None, delta_norm=2, delta_norm_lambda=0.0002, keep_optimizer=False, resume_training_from_ckpt=None, update_param_list=None, device='cuda:0', lpips_type='alex', progressive_steps=[0, 20000, 22000, 24000, 26000, 28000, 30000, 32000, 34000, 36000, 38000, 40000, 42000, 44000, 46000, 48000, 50000, 52000], progressive_start=20000, progressive_step_every=2000)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import numpy as np
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from sklearn.random_projection import GaussianRandomProjection

# Create a fixed projection matrix using Johnson–Lindenstrauss lemma
def get_random_projection(original_dim=18 * 512, target_dim=512, seed=42):
    rp = GaussianRandomProjection(n_components=target_dim, random_state=seed)
    rp.fit(np.eye(original_dim))  # Fit on identity to get components
    return rp.components_.T  # shape (original_dim, target_dim)

# Apply the projection matrix
def reduce_latent_dim(latent, W):
    return latent @ W  # shape (512,)

# Main dataset loading function
def load_image_latent_dataset(base_path, transform_fn, projection_matrix, file_exts={'.jpg', '.png', '.jpeg', '.bmp', '.webp'}):
    X = []
    y = []
    label_names = []

    for root, dirs, files in os.walk(base_path):
        label = os.path.basename(root)
        for file in files:
            ext = os.path.splitext(file)[-1].lower()
            if ext in file_exts:
                full_path = os.path.join(root, file)
                try:
                    _, latent = transform_fn(full_path)  # latent: (18 x 512,)
                    latent_np = latent.to('cpu').numpy()
                    reduced = reduce_latent_dim(latent_np, projection_matrix)
                    X.append(reduced)
                    label_names.append(label)
                except Exception as e:
                    print(f"Skipping {full_path}: {e}")

    le = LabelEncoder()
    y_encoded = le.fit_transform(label_names)

    return np.array(X), np.array(y_encoded), le

DATASET_PATH = "/content/drive/MyDrive/Dataset"
projection_matrix = get_random_projection()
X, y, label_encoder = load_image_latent_dataset(DATASET_PATH, transform_fn=process, projection_matrix=projection_matrix)


  img = PIL.Image.fromarray(np.uint8(np.clip(np.rint(img), 0, 255)), 'RGB')


In [None]:
X.shape,y.shape

((499, 512), (499,))

In [None]:
label_encoder.classes_

array(['African', 'Asian', 'Caucasian', 'Hispanic', 'Middle Eastern'],
      dtype='<U14')

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier

In [None]:
def load_data():
    # Load latents and labels
    X, y, label_encoder = load_image_latent_dataset(DATASET_PATH, transform_fn=process, projection_matrix=projection_matrix)
    print(f"X and Y shape: {X.shape, y.shape}")

    return X, y

# Call the function and get data
X, y= load_data()

print(X[:2])  # first two feature vectors
print(y[:2])  # first two labels

  img = PIL.Image.fromarray(np.uint8(np.clip(np.rint(img), 0, 255)), 'RGB')


X and Y shape: ((499, 512), (499,))
[[-3.22221695  2.43548935 -2.35343109 ... -2.02379906 -0.98668709
   0.53092986]
 [-0.41097158  0.43074532  2.73369143 ...  0.49759565  0.28030269
   0.18811426]]
[2 2]


In [None]:
# Step 2: Split data
def split_data(X, y, val_size=0.2):
    """
    Split data into train, val, and test sets.
    """
    X_train,X_val,y_train,y_val=train_test_split(X,y,test_size=0.2,random_state=42)
    return X_train, X_val, y_train, y_val


X_train, X_val, y_train, y_val = split_data(X, y, val_size=0.2)

print(f"Train shape: {X_train.shape}, Val shape: {X_val.shape}")

Train shape: (399, 512), Val shape: (100, 512)


In [None]:
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
import numpy as np

def fit_knn_without_pca(X_train, y_train, X_val, y_val, n_neighbors=20):
    """
    Fit KNN classifier and return the fitted model.
    Also plots the error rate vs K.
    """
    error_rate = []

    for i in range(1, n_neighbors+1):
        knn = KNeighborsClassifier(n_neighbors=i)
        knn.fit(X_train, y_train)
        pred_i = knn.predict(X_val)
        error_rate.append(np.mean(pred_i != y_val))

    # Return model with best K
    best_k = error_rate.index(min(error_rate)) + 1
    print(f"Best K: {best_k} with error rate {min(error_rate):.4f}")

    best_knn = KNeighborsClassifier(n_neighbors=best_k)
    best_knn.fit(X_train, y_train)
    return best_knn
knn_model_without_pca=fit_knn_without_pca(X_train, y_train, X_val, y_val, n_neighbors=20)

Best K: 2 with error rate 0.5000


In [None]:
# Step 3a: Fit PCA on training data
def fit_pca(X_train, variance_threshold=0.99):

    """
    Fit PCA with enough components to retain 99% variance.
    Return the fitted PCA object and transformed data.
    """
    pca = PCA(n_components=variance_threshold, svd_solver='full')
    X_train_pca = pca.fit_transform(X_train)
    print(f"Original shape: {X_train.shape}")
    print(f"Reduced shape: {X_train_pca.shape}")
    print(f"Explained variance ratio sum: {pca.explained_variance_ratio_.sum():.4f}")
    return pca, X_train_pca
fit_pca=fit_pca(X_train)

Original shape: (399, 512)
Reduced shape: (399, 254)
Explained variance ratio sum: 0.9902


In [None]:
# Step 3b: Fit KNN classifier
def fit_knn_with_pca(X_train_pca, y_train, X_val, y_val, n_neighbors=20):
    """
    Fit KNN classifier on PCA-reduced data.
    Return the fitted model.
    """
    error_rate=[]
    for i in range(1, n_neighbors+1):
        knn = KNeighborsClassifier(n_neighbors=i)
        knn.fit(X_train_pca, y_train)
        pred_i = knn.predict(X_val)
        error_rate.append(np.mean(pred_i != y_val))


    # Return model with best K
    best_k = error_rate.index(min(error_rate)) + 1
    print(f"Best K: {best_k} with error rate {min(error_rate):.4f}")

    best_knn = KNeighborsClassifier(n_neighbors=best_k)
    best_knn.fit(X_train_pca, y_train)
    return best_knn
knn_model_without_pca=fit_knn_without_pca(X_train, y_train, X_val, y_val, n_neighbors=20)

Best K: 2 with error rate 0.5000


In [None]:
# Step 4: Evaluate model on validation set
def evaluate(models, X_val, y_val):
    """
    Predict on val and return accuracy.
    """
    val_accuracies=[]
    for model in models:
        val_accuracy = model.score(X_val, y_val)
        val_accuracies.append(val_accuracy)
    return val_accuracies



In [None]:
# Step 5: Compare both approaches and pick the better one
def compare_models(X_train, y_train, X_val, y_val):
    # Fit KNN without PCA
    knn_without_pca = fit_knn_without_pca(X_train, y_train, X_val, y_val, n_neighbors=20)

    # Fit PCA and transform validation data
    pca, X_train_pca = fit_pca(X_train)
    X_val_pca = pca.transform(X_val)

    # Fit KNN with PCA
    knn_with_pca = fit_knn_with_pca(X_train_pca, y_train, X_val_pca, y_val, n_neighbors=20)

    models = [knn_without_pca, knn_with_pca]
    val_data = [X_val, X_val_pca]
    val_accuracies = [evaluate([models[i]], [val_data[i]], y_val)[0] for i in range(len(models))]

    print(f"Validation accuracy without PCA: {val_accuracies[0]:.4f}")
    print(f"Validation accuracy with PCA: {val_accuracies[1]:.4f}")

    best_model_idx = np.argmax(val_accuracies)
    best_model = models[best_model_idx]
    return best_model, pca if best_model_idx == 1 else None

best_model, best_pca = compare_models(X_train, y_train, X_val, y_val)

Best K: 2 with error rate 0.5000
Original shape: (399, 512)
Reduced shape: (399, 254)
Explained variance ratio sum: 0.9902
Best K: 2 with error rate 0.5000
Validation accuracy without PCA: 0.5000
Validation accuracy with PCA: 0.5000


In [None]:
def infer(image_path, transform_fn, projection_matrix, model, label_encoder, pca=None):
    """
    Given a new image path, return the predicted label and class.

    Steps:
    - Load and encode image using transform_fn
    - Project to 512D using the given matrix
    - If PCA was used, apply the same fitted PCA
    - Predict using the given model
    - Decode label using label_encoder
    """

    # Step 1: Load and encode image
    _, latent = transform_fn(image_path)  # latent is the 18 x 512 vector

    # Step 2: Project latent to 512D using the given matrix
    latent_np = latent.cpu().numpy()  # Ensure the latent is on CPU and convert to numpy
    reduced_latent = reduce_latent_dim(latent_np, projection_matrix)

    # Step 3: Apply PCA if it was used during training
    if pca is not None:
        reduced_latent_for_prediction = pca.transform([reduced_latent])  # Apply PCA transformation
    else:
        reduced_latent_for_prediction = reduced_latent.reshape(1, -1) # Reshape for the model if PCA was not applied


    # Step 4: Make a prediction using the model (KNN)
    predicted_class = model.predict(reduced_latent_for_prediction)

    # Step 5: Decode the predicted label using the label_encoder
    predicted_label = label_encoder.inverse_transform(predicted_class)

    return predicted_label[0], predicted_class[0] # Return the predicted label and class

In [None]:
image_path = "/content/african.jpg"

# Use the best_model and best_pca returned by compare_models
predicted_label_with_best_model, predicted_class_with_best_model = infer(image_path, transform_fn=process, projection_matrix=projection_matrix, model=best_model, label_encoder=label_encoder, pca=best_pca)

print(f"Predicted label with the best model: {predicted_label_with_best_model}")
print(f"Predicted class (encoded) with the best model: {predicted_class_with_best_model}")

Predicted label with the best model: African
Predicted class (encoded) with the best model: 0
