In [6]:
import pandas as pd
import numpy as np

import sys
import os 
from datetime import datetime
import random as rnd
import itertools as it

import warnings
warnings.filterwarnings('ignore')

# Making pics info DataFrame

In [7]:
def get_basename(path): # dataset/lfw_funneled/Abel_Pacheco/Abel_Pacheco_0001.JPG -> Abel_Pacheco_0001
    return os.path.splitext(os.path.basename(path))[0]

In [8]:
path = 'dataset/lfw_funneled'

pairs = ''
with open(f'{path}/pairs.txt', 'r') as file:
    pairs = file.read()

df = pd.DataFrame(list(map(lambda x: x.split('\t'), pairs.split('\n')[1:301])), columns=['name', 'pic1', 'pic2'])
df = (pd.melt(df, id_vars='name', value_vars=['pic1', 'pic2'], var_name='picNum', value_name='pic')
        .drop('picNum', axis=1)
        .sort_values(by=['name', 'pic'])
        .drop_duplicates()
        .reset_index(drop=True))

df['pic'] = df['pic'].apply(lambda x: x.zfill(4))
df['path_x1'] = str(path) + '/' + df['name'] + '/' + df['name'] + '_' + df['pic'] + '.jpg'
df.to_csv('pics_df.csv')
df.head()

Unnamed: 0,name,pic,path_x1
0,Abel_Pacheco,1,dataset/lfw_funneled/Abel_Pacheco/Abel_Pacheco...
1,Abel_Pacheco,4,dataset/lfw_funneled/Abel_Pacheco/Abel_Pacheco...
2,Akhmed_Zakayev,1,dataset/lfw_funneled/Akhmed_Zakayev/Akhmed_Zak...
3,Akhmed_Zakayev,2,dataset/lfw_funneled/Akhmed_Zakayev/Akhmed_Zak...
4,Akhmed_Zakayev,3,dataset/lfw_funneled/Akhmed_Zakayev/Akhmed_Zak...


In [9]:
pics_to_scale_path = df['path_x1'].values
pics_to_scale = list(map(lambda x: get_basename(x) + '_result.jpg', pics_to_scale_path))
print(f'There are {len(pics_to_scale)} unique pics to be scaled')

There are 500 unique pics to be scaled


# Scaling with DCSCN and adding path info to pics DataFrame

In [None]:
# Тут надо самому ставить нужный скейл: 2 3 или 4. Так как памяти нехватает + настроить сохранение
# в нужную папку сложна (не хочется заморачиваться), то надо перезапускать и руками переименовывать папку.

scale = 4
scaled_pics = os.listdir(f'dataset/output_DCSCN_x{scale}')
for pic, pic_path in zip(pics_to_scale, pics_to_scale_path):
    if pic in scaled_pics:
        continue
    abc = f'--file={pic_path} --scale={scale}'
    %run sr.py {abc}

In [None]:
for scale in [2, 3, 4]:
    df[f'path_x{scale}'] = f'dataset/output_DCSCN_x{scale}/'+df[f'path_x1'].apply(get_basename)+'_result.jpg'
df.head()

In [None]:
df.to_csv('pics_df.csv')

# Making DataFrame for classification

In [None]:
df = pd.read_csv('pics_df.csv', index_col=0)
df.head()

#### Получим всевозможные пары фоточек для каждого человека

In [None]:
classification_data_sim = []
for name, name_df in df[['name', 'pic']].groupby('name'):
    pics = name_df['pic'].values
    pics_comb = list(it.combinations(pics, 2))
    pics_name_comb = list(map(lambda x: [name]+list(x), pics_comb))
    for comb in pics_name_comb:
        classification_data_sim.append(comb)
classification_df_sim = pd.DataFrame(classification_data_sim, columns=['name', 'pic1', 'pic2'])
classification_df_sim.head()

In [None]:
for pic_num in [1, 2]:
    classification_df_sim = (pd.merge(classification_df_sim, df, 
                                      left_on=['name', f'pic{pic_num}'], right_on=['name', 'pic'], 
                                      how='left', suffixes=('_1', '_2')))
classification_df_sim.drop(['pic_1', 'pic_2'], axis=1, inplace=True)
classification_df_sim.head()

In [None]:
classification_df_sim.to_csv('classification_similar_people_pics_df.csv')
print(f'We\'ve got {classification_df_sim.shape[0]} pairs of pics with similar persons')

#### И столько же для разных людей

In [None]:
non_sim_people_pics = [el for el in it.combinations(df[['name', 'pic']].values.tolist(), 2) if el[0][0] != el[1][0]]
non_sim_people_pics = [el[0]+el[1] for el in non_sim_people_pics]
non_sim_people_pics = rnd.sample(non_sim_people_pics, classification_df_sim.shape[0])
classification_df_nonsim = pd.DataFrame(non_sim_people_pics, columns=['name1', 'pic1', 'name2', 'pic2'])
classification_df_nonsim.head()

In [None]:
for pic_num in [1, 2]:
    classification_df_nonsim = (pd.merge(classification_df_nonsim, df, 
                                      left_on=[f'name{pic_num}', f'pic{pic_num}'], right_on=['name', 'pic'], 
                                      how='left', suffixes=('_1', '_2')))
classification_df_nonsim.drop(['name_1', 'pic_1', 'name_2', 'pic_2'], axis=1, inplace=True)
classification_df_nonsim.head()

In [None]:
classification_df_nonsim.to_csv('classification_non-similar_people_pics_df.csv')
print(f'And {classification_df_nonsim.shape[0]} pairs of pics with non-similar persons')

# Classification DCSCN

In [None]:
from recognizer import Recognizer
from skimage.io import imread

THRESHOLD = 0.9

In [None]:
recognizer = Recognizer()

def recongize(row):
    photo1 = imread(row.ix[0])
    photo2 = imread(row.ix[1])
    similarity = recognizer.get_best_similarity(photo1, photo2)
    return similarity < THRESHOLD

### Similar and non-similar people

In [None]:
data_similar = pd.read_csv('classification_similar_people_pics_df.csv', index_col=0)
data_nonsimilar = pd.read_csv('classification_non-similar_people_pics_df.csv', index_col=0)

In [None]:
%%time
print(f'Started at {datetime.now()}')
for data in [data_similar, data_nonsimilar]:
    for scale in [1, 2, 3, 4]:
        start = datetime.now()
        data[f'rec_x{scale}'] = data[[f'path_x{scale}_1', f'path_x{scale}_2']].apply(recongize, axis=1)
        print(f'Scale x{scale} is done in {datetime.now() - start}')

In [None]:
data_similar.to_csv('classification_similar_people_pics_df.csv')
data_nonsimilar.to_csv('classification_non-similar_people_pics_df.csv')

In [None]:
def counts_pct(col):
    return round(col.value_counts(normalize=True) * 100, 2)

classification_results = pd.DataFrame()
classification_results['person_type'] = ['Similar'] * 4 + ['Non-Similar'] * 4 + ['Mixed'] * 4
classification_results['scale'] = ['x1', 'x2', 'x3', 'x4'] * 3

recs = ['rec_x1', 'rec_x2', 'rec_x3', 'rec_x4']
recs_sim = data_similar[recs]
recs_nonsim = data_nonsimilar[recs].replace({True: False, False: True})
classification_results['accuracy'] = (list(recs_sim.apply(counts_pct).loc[True].values) + 
                                      list(recs_nonsim.apply(counts_pct).loc[True].values) +
                                      list(pd.concat([recs_sim, recs_nonsim]).apply(counts_pct).loc[True].values))

classification_results

In [None]:
classification_results.to_csv('classification_results_DCSCN.csv')

# Scaling with ESRGAN and adding path info to pics DataFrame

In [30]:
import glob
import cv2
import torch
import ESRGAN.architecture as arch

In [31]:
model_path = 'ESRGAN/models/RRDB_ESRGAN_x4.pth' # sys.argv[1]  # models/RRDB_ESRGAN_x4.pth OR models/RRDB_PSNR_x4.pth
device = torch.device('cuda')  # if you want to run on CPU, change 'cuda' -> cpu

model = arch.RRDB_Net(3, 3, 64, 23, gc=32, upscale=4, norm_type=None, act_type='leakyrelu', 
                      mode='CNA', res_scale=1, upsample_mode='upconv')
model.load_state_dict(torch.load(model_path), strict=True)
model.eval()
for k, v in model.named_parameters():
    v.requires_grad = False
model = model.to(device)

In [27]:
scale = 4
for pic, pic_path in zip(pics_to_scale, pics_to_scale_path):
    img = cv2.imread(pic_path, cv2.IMREAD_COLOR)
    img = img * 1.0 / 255
    img = torch.from_numpy(np.transpose(img[:, :, [2, 1, 0]], (2, 0, 1))).float()
    img_LR = img.unsqueeze(0)
    img_LR = img_LR.to(device)

    output = model(img_LR).data.squeeze().float().cpu().clamp_(0, 1).numpy()
    output = np.transpose(output[[2, 1, 0], :, :], (1, 2, 0))
    output = (output * 255.0).round()
    cv2.imwrite(f'dataset/output_ESRGAN_x4/{pic}', output)