Kjøre celle for å sette opp systemet:

In [None]:
!git clone https://github.com/SimeNor/tenk.git

import os
os.chdir("tenk")

!pip install -r requirements.txt

In [None]:
os.system('unzip cropped.zip && rm uncropped.zip')

In [None]:
import pandas as pd
import numpy as np
meta = pd.read_csv("imdb.csv")

In [None]:
popDf = pd.DataFrame(meta[meta.photo_taken>2010].name.value_counts(), columns=['name'])
topCelebs = meta[(meta["face_score"] > 4) & (np.isnan(meta["second_face_score"])) & meta["name"].isin(list(popDf[popDf.name>30].index))]
topCelebs.to_csv("topCelebs30.csv")

In [None]:
import cv2
import math
from matplotlib import pyplot as plt

def display_random_images(metadata, num_images=6):
    # create figure
    fig = plt.figure(figsize=(15, 10))

    # setting values to rows and column variables
    rows = int(math.ceil(num_images / 2))
    columns = 2
    # reading images
    sample = metadata.sample(n=num_images)
    filenames = sample.full_path
    celebs = sample.name
    face_score = sample.face_score
    # Adds a subplot at the 1st position
    for i, filepath in enumerate(filenames):
        try:
            fig.add_subplot(rows, columns, i + 1)
            img = cv2.cvtColor(cv2.imread(f'imdb_crop/{filepath}'), cv2.COLOR_BGR2RGB)
            plt.imshow(img)
            plt.axis('off')
            plt.title(f"{celebs.iloc[i]}, {face_score.iloc[i]}")
        except:
            print(filepath, f"{celebs.iloc[i]}, {face_score.iloc[i]}")

In [None]:
from utils import 

display_random_images(topCelebs, num_images=16)

In [None]:
import shutil
import os

if not os.path.isdir(f"Celebs"):
        os.mkdir(f"Celebs")

for i, row in topCelebs.iterrows():
    if not os.path.isdir(f"Celebs/{row.celeb_id}/"):
        os.mkdir(f"Celebs/{row.celeb_id}")
    shutil.copy2("imdb_crop/"+row.full_path, f"Celebs/{row.celeb_id}/")

## Init cropping-model and set parameters


In [None]:
from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
import numpy as np
import os

In [None]:
data_dir = 'Celebs'

batch_size = 64
epochs = 8
workers = 0 if os.name == 'nt' else 8

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

In [None]:
mtcnn = MTCNN(
    image_size=160, margin=5, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

### Extract and load cropped images

In [None]:
dataset = datasets.ImageFolder(data_dir, transform=transforms.Resize((512, 512)))
dataset.samples = [
    (p, p.replace(data_dir, data_dir + '_cropped'))
        for p, _ in dataset.samples
]
        
loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    collate_fn=training.collate_pil
)

In [None]:

for i, (x, y) in enumerate(loader):
    mtcnn(x, save_path=y)
    print('\rBatch {} of {}'.format(i + 1, len(loader)), end='')
    
# Remove mtcnn to reduce GPU memory usage
del mtcnn

In [None]:
"""
!zip -r cropped.zip Celebs_cropped
!zip -r uncropped.zip Celebs



def upload_to_s3():
    import boto3
    import os

    BUCKET_NAME = 'celebfaces' # replace with your bucket name

    # enter authentication credentials
    s3 = boto3.resource('s3',
                        aws_access_key_id='',
                        aws_secret_access_key='')

    s3.Bucket(BUCKET_NAME).upload_file('cropped.zip', 'cropped.zip')
    s3.Bucket(BUCKET_NAME).upload_file('uncropped.zip', 'uncropped.zip')

upload_to_s3()
"""

### Init ID-model

In [None]:
resnet = InceptionResnetV1(
    classify=True,
    pretrained='vggface2',
    num_classes=len(dataset.class_to_idx)
).to(device)

In [None]:
optimizer = optim.Adam(resnet.parameters(), lr=0.001)
scheduler = MultiStepLR(optimizer, [5, 10])

trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])

dataset = datasets.ImageFolder(data_dir + '_cropped', transform=trans)
img_inds = np.arange(len(dataset))
np.random.shuffle(img_inds)
train_inds = img_inds[:int(0.8 * len(img_inds))]
val_inds = img_inds[int(0.8 * len(img_inds)):]

train_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds)
)
val_loader = DataLoader(
    dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(val_inds)
)

In [None]:
loss_fn = torch.nn.CrossEntropyLoss()
metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

### Train ID-model

In [None]:
writer = SummaryWriter()
writer.iteration, writer.interval = 0, 1

resnet.eval()

for epoch in range(epochs):
    print('\nEpoch {}/{}'.format(epoch + 1, epochs))
    print('-' * 10)

    resnet.train()
    training.pass_epoch(
        resnet, loss_fn, train_loader, optimizer, scheduler,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

    resnet.eval()
    training.pass_epoch(
        resnet, loss_fn, val_loader,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

writer.close()

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs

In [None]:
def collate_fn(x):
    return x[0]

test_dataset = datasets.ImageFolder('Celebs_cropped')
test_dataset.idx_to_class = {i:c for c, i in test_dataset.class_to_idx.items()}
test_loader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=collate_fn, num_workers=workers)

In [None]:
from torchvision.transforms import functional as F

aligned = []
names = []
for x, y in test_loader:
    aligned.append(F.to_tensor(np.float32(x)))
    names.append(test_dataset.idx_to_class[y])

In [None]:
aligned = torch.stack(aligned).to(device)
embeddings = resnet(aligned).detach().cpu()

In [None]:
dists = [[(e1 - e2).norm().item() for e2 in embeddings] for e1 in embeddings]
resdf = pd.DataFrame(dists, columns=names, index=names)