In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import csv
import glob
import imageio.v3 as iio
import matplotlib
import numpy as np
import os
import pandas as pd
import pickle

In [None]:
ROOT_FILE_PATH = "/content/drive/My Drive/2023-Machine-Learning-Dataset";
SUBJ_LIST = ["subj01", "subj02", "subj03", "subj04", "subj05", "subj06", "subj07", "subj08"];
AREAS = ["prf-visualrois", "floc-bodies", "floc-faces", "floc-places", "floc-words", "streams", "all-vertices"];

def ReadNpyFile(folderdir):
  retdic = {};
  for path in glob.glob(folderdir + "/*.npy"):
    filename = os.path.splitext(path)[0].split("/")[-1];
    retdic[filename] = np.load(path, allow_pickle = True);
  return retdic;

def ReadFmriFile(folderdir):
  retdic = {};
  for path in glob.glob(folderdir + "/*.npy"):
    filename = os.path.splitext(path)[0].split("/")[-1];
    retdic[filename[0]] = np.load(path, allow_pickle = True);
  return retdic;

def ReadMaskFile(folderdir):
  retdic = {};
  for path in glob.glob(folderdir + "/*.npy"):
    filename = os.path.splitext(path)[0].split("/")[-1];
    for area in AREAS:
      if area in filename:
        filename = "".join(i.strip("_") for i in filename.split(area));
        break;
    if area not in retdic:
      retdic[area] = {};
    retdic[area][filename] = np.load(path, allow_pickle = True) if filename[0] != "m" else np.load(path, allow_pickle = True).item();
  return retdic;

def FetchPngFile(folderdir, index, mode = "rgb"):
  img = iio.imread(os.path.join(folderdir, f'{index}.png'));
  if(mode == "rgb"):
    return img;
  if(mode == "hsv"):
    return matplotlib.colors.rgb_to_hsv(img);
  if(mode == "gray"):
    return np.dot(img, [0.299, 0.587, 0.114]);

In [None]:
class ReadObject:
  subject = None;     # string
  fmri = None;        # dict(string -> ndarray, float32)
  masks = None;       # dict(string "AREA" -> (dict(int -> string), string "HEMISPHERE, " -> ndarray))
  image_data = None;  # dataframe

  def load(self, subject, mode = "train"):
    #Mode = "train" | "test"
    if(subject != self.subject):
      self.subject = subject;
      self.masks = ReadMaskFile(os.path.join(ROOT_FILE_PATH, subject , "roi_masks"));
      self.image_data = pd.read_csv(os.path.join(ROOT_FILE_PATH, "image_infos", f"{subject}_infos_train.csv"));
    if(mode == "train"):
      self.fmri = ReadFmriFile(os.path.join(ROOT_FILE_PATH, subject, "training_split", "training_fmri"));
    elif(mode == "test"):
      self.fmri = ReadFmriFile(os.path.join(ROOT_FILE_PATH, subject, "test_split", "test_fmri"));

  def get_image_data(self, return_type = "array"):
    if(return_type == "array"):
      tmp = self.image_data;
      tmp = tmp.drop(columns = ['cocoId', 'cocoSplit']);
      return tmp.to_numpy();
    elif(return_type == "dataframe"):
      return self.image_data;

  def sample_data(self, index_list, mode = "hsv"):
    selection = np.isin(np.arange(5000), index_list);
    images = [FetchPngFile(os.path.join(ROOT_FILE_PATH, self.subject, "training_split", "training_images"), i, mode = mode) for i in sorted(index_list)];
    images = np.stack(images);
    fmri_l = self.fmri['l'][selection];
    fmri_r = self.fmri['r'][selection];
    df = self.get_image_data(return_type = "array")[selection];
    return images, fmri_l, fmri_r, df;
    

In [None]:
DATA = ReadObject(); 
DATA.load(SUBJ_LIST[0]);
DATA.sample_data([0, 1, 2, 3, 4], mode = "gray");