In [1]:
import platform
import os
if platform.system() == 'Darwin':
    DATA_PATH = "/Users/maltegenschow/Documents/Uni/Thesis/Data.nosync"
    ROOT_PATH = "/Users/maltegenschow/Documents/Uni/Thesis/Thesis"
elif platform.system() == 'Linux':
    DATA_PATH = "/pfs/work7/workspace/scratch/tu_zxmav84-thesis/Data.nosync"
    ROOT_PATH = "/pfs/work7/workspace/scratch/tu_zxmav84-thesis/Thesis"

current_wd = os.getcwd()

In [2]:
import pandas as pd
import torch
import numpy as np
from tqdm import tqdm

### Load in Metadata and Latents

In [None]:
latents = torch.load(f"{DATA_PATH}/Models/e4e/00005_snapshot_1200/inversions/latents_dict.pt")
meta = pd.read_json(f"{DATA_PATH}/Zalando_Germany_Dataset/dresses/metadata/dresses_metadata.json").T.rename_axis('sku').reset_index()
meta = meta[['sku', attribute]]

### Single Attribute Data Prep

In [4]:
attribute = 'category'

if meta[attribute].isna().any():
    # Remove observations with missing values in attribute
    meta = meta[meta[attribute].isna() == False].reset_index()
    # Subset latents to the cleaned metadata
    latents = {k:v for k,v in latents.items() if k in list(meta.sku)}

assert len(latents) == len(meta), "Mismatch between latents shape and metadata shape"

In [5]:
latents_out = np.zeros([len(meta), 1, 16, 512])
targets = np.zeros([len(meta), meta[attribute].nunique(), 1])

# Get one-hot encoded array of targets
dummies = pd.get_dummies(meta[attribute])
attribute_order = list(dummies.columns)
dummies = np.array(dummies)
dummies = dummies.reshape(len(meta), -1, 1)

for i in tqdm(range(len(meta))):
    sku = meta.iloc[i].sku
    latents_out[i, :,:,:] = latents[sku].numpy()
    targets[i,:,:] = dummies[i,:,:]

print(f"Latents out shape: {latents_out.shape}")
print(f"Target shape: {targets.shape}")

100%|██████████| 14060/14060 [00:01<00:00, 13663.82it/s]

Latents out shape: (14060, 1, 16, 512)
Target shape: (14060, 8, 1)





In [6]:
# Save Data
base_save_dir = f"{DATA_PATH}/Models/StyleFlow/Inputs/"
os.makedirs(f"{base_save_dir}{attribute}", exist_ok=True)
np.save(f"{base_save_dir}{attribute}/latents.npy", latents_out)
np.save(f"{base_save_dir}{attribute}/targets.npy", targets)