### Image Reconstruction Using AutoEncoder

### Import Section

In [12]:
import numpy as np
import cv2
import pandas as pd
import os

### Variable Definitions

In [9]:
ATTRS_NAME = "lfw_attributes.txt"

IMAGES_NAME = "lfw-deepfunneled.tgz"

RAW_IMAGES_NAME = "lfw.tgz"

### Decoding image from raw bytes

Here we use two functions: 

1. Convert raw matrix to image
2. Change color system to RGB

In [10]:
def decode_image_from_raw_bytes(raw_bytes):
  img = cv2.imdecode(np.asarray(bytearray(raw_bytes), dtype=np.uint8), 1)
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  return img

### Loading the LFW dataset and adapting to the format using above function

In [13]:
def load_lfw_dataset(use_raw=False, dx=80, dy=80, dimx=45, dimy=45):
    # Reading the attributes from file
    df_attrs = pd.read_csv(ATTRS_NAME, sep='\t', skiprows=1)
    df_attrs = pd.DataFrame(df_attrs.iloc[:, :-1].values, columns=df_attrs.columns[1:])
    imgs_with_attrs = set(map(tuple, df_attrs[["person", "imagenum"]].values))

    # Reading images
    all_images = []
    image_ids = []

    with tarfile.open(RAW_IMAGES_NAME if use_raw else IMAGES_NAME) as f:
        for m in tqdm.tqdm_notebook(f.getmembers()):
            # Only process image files from the compressed data
            if m.isfile() and m.name.endswith(".jpg"):
                # Prepare image
                img = decode_image_from_raw_bytes(f.extractfile(m).read())

                # Crop only faces and resize it
                img = img[dy:-dy, dx:-dx]
                img = cv2.resize(img, (dimx, dimy))

                # Parse person and append it to the collected data
                fname = os.path.split(m.name)[-1]
                fname_splitted = fname[:-4].replace('_', ' ').split()
                person_id = ' '.join(fname_splitted[:-1])
                photo_number = int(fname_splitted[-1])
                if (person_id, photo_number) in imgs_with_attrs:
                    all_images.append(img)
                    image_ids.append({'person': person_id, 'imagenum': photo_number})

    image_ids = pd.DataFrame(image_ids)
    all_images = np.stack(all_images).astype('uint8')

    # Preserve image_ids order
    all_attrs = image_ids.merge(df_attrs, on=('person', 'imagenum')).drop(["person", "imagenum"], axis=1)

    return all_images, all_attrs