In [None]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
from functools import wraps
from time import time
from PIL import Image
from torchvision import transforms
import torch

preprocess = transforms.Compose([
    transforms.Resize((1000, 1500)),
    transforms.CenterCrop(384),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

def timing(f):
    @wraps(f)
    def wrap(*args, **kw):
        ts = time()
        result = f(*args, **kw)
        te = time()
        print('func:%r args:[%r, %r] took: %2.4f sec' % \
          (f.__name__, args, kw, te-ts))
        return result
    return wrap

def data_loader(path = "/Users/nattapolchanpaisit/Desktop/Columbia Gaze Data Set copy/", plot_sample=False):
    dirs = []
    files_path = []
    count = 0
    for each in os.listdir(path):
        if each[:2] == '00':
            dirs.append(each)
            files_path.append([f.path for f in os.scandir(path+each+'/') if f.is_file() and f.path[-3:] == 'jpg'])
    data = []
    print(f"{len(files_path)} images found")
    def read_angles(name):
        # print(name)
        filter = name.split("/")[-1].split("_")[3:]
        # print(filter)
        yaw = int(filter[0][:-1])
        pitch = int(filter[1].replace("H.jpg", ''))
        return yaw, pitch
    for person in files_path:
      temp = []
      for name in person:
          yaw, pitch = read_angles(name)
          with Image.open(name) as image:
              image_matrix = preprocess(image)
          temp.append([image_matrix, [yaw, pitch]])
          if plot_sample:
              num = np.random.randint(1,1000)
              if num > 998:
                  plt.imshow(data[-1][0][0,:,:])
                  plt.title("Sample Input")
                  plt.show()
                  print(f"{data[-1][1]}")
          if count % 20 == 0:
              print(count)
          count += 1
      data.append(temp)
    return data

data = data_loader()

In [11]:
images, gts = [],[]
for _ in range(len(data)):
  images.append([data[_][i][0] for i in range(len(data[_]))])
  gts.append([[data[_][i][1][0], data[_][i][1][1]] for i in range(len(data[_]))])
def save_dataset():
    for i in range(29,len(images)):
        gt = np.array(gts[i])
        assert gt.shape[1] == 2, "Incorrect shape for gt"
        image = np.array(torch.stack(images[i]))
        assert image.shape[1:] == (3, 384, 384), "Incorrect image size"
        image = image.reshape(image.shape[0], -1)
        with open(f'/Users/nattapolchanpaisit/Desktop/MLDA/datasets_face_384x384/data_gt_{i}.csv', 'w') as my_file:
          for __ in gt:
            np.savetxt(my_file, __)
        with open(f'/Users/nattapolchanpaisit/Desktop/MLDA/datasets_face_384x384/data_image_{i}.csv', 'w') as my_file:
          for __ in image:
            np.savetxt(my_file,__)
        print(f'Array exported to file (data_image_{i}.csv/data_gt_{i}.csv)')
# save_dataset()