In [2]:
from utils import resize, \
                  process_name, \
                  get_info, \
                  fix_orientation
from shoeplotlib import plot_RBG_dist
from PIL import Image, ImageOps
import cv2
from scipy import ndimage

import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import entropy
import seaborn as sns
import math

import os
import shutil
import glob
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

## Pre-processing

In [3]:
# directories
curr_path = os.getcwd()
project_path = os.path.join(curr_path, '..')
non_hype_raw_path = os.path.join(project_path, 'data', 'non-hype-raw')
non_hype_path = os.path.join(project_path, 'data', 'non-hype')
data_path = os.path.join(project_path, 'data')
img_folders = ['stockx', 'goat', 'flight_club', 'non-hype']

In [3]:
# create non-hype after rotation folder
if os.path.exists(non_hype_path):
    shutil.rmtree(non_hype_path)
os.makedirs(non_hype_path)

In [4]:
# fix orientations
for file in tqdm(glob.glob(os.path.join(non_hype_raw_path,'**'))):
    img, file_name = fix_orientation(file)
    cv2.imwrite(os.path.join(non_hype_path,file_name), img) 

100%|██████████| 4293/4293 [04:58<00:00, 14.40it/s]


In [18]:
# create preprocessing folders
processed_paths = [f'{path}_resized' for path in img_folders]
for folder in processed_paths:
    if os.path.exists(os.path.join(data_path,folder)):
        shutil.rmtree(os.path.join(data_path,folder))
    os.makedirs(os.path.join(data_path,folder))

In [3]:
# record raw image specifications
hyped_info, non_hyped_info = [], []
target_size = (224,224) 

# resize and gather info loop
for folder in img_folders:
    for file in tqdm(glob.glob(os.path.join(data_path,folder,'**'))):
        num_files = len(glob.glob(os.path.join(data_path,folder,'**')))
        # rename image file and open the file
        processed_name = process_name(file, folder)
        source_img = Image.open(file)
        save_path = os.path.join(data_path,
                                 f'{folder}_resized', \
                                 processed_name)
        
        # grab raw image info and shoe info
        if folder == 'non-hype':
            non_hyped_info.append(get_info(source_img, 
                                         processed_name,
                                         folder,
                                         save_path))
        else:
            hyped_info.append(get_info(source_img, 
                                     processed_name,
                                     folder,
                                     save_path))
            
        # preprocess image
#         resized_img = resize(source_img.copy(), target_size)
        
        # save to new path
#         resized_img.save(save_path, format="JPEG")

100%|██████████| 866/866 [00:16<00:00, 52.97it/s]
100%|██████████| 972/972 [00:17<00:00, 56.36it/s]
100%|██████████| 1075/1075 [00:24<00:00, 44.61it/s]
100%|██████████| 3845/3845 [03:38<00:00, 17.57it/s]


In [4]:
# metadata dfs
hyped_cols = ['name','brand','width','height','source', 'path']
non_hyped_cols = ['name','width','height','source', 'path']
hyped_info_df = pd.DataFrame(hyped_info, columns=hyped_cols)
non_hyped_info_df = pd.DataFrame(non_hyped_info, columns=non_hyped_cols)

In [5]:
# non_hyped_info_df.to_csv('non_hyped_info_df.csv',index=None)
# hyped_info_df.to_csv('hyped_info_df.csv',index=None)