In [2]:
import warnings
warnings.filterwarnings("ignore")

import os
import random
import time
import json
import zipfile
import gc
import glob

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import cv2

import numpy as np
import pandas as pd
from tqdm import tqdm


# Transform을 위한 라이브러리
from torchvision import transforms
from torchvision import models
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Model을 위한 라이브러리
import segmentation_models_pytorch as smp
from segmentation_models_pytorch.encoders import get_preprocessing_fn

# 이미지 시각화를 위한 라이브러리
from PIL import Image
import webcolors
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import seaborn as sns
sns.set()

%matplotlib inline

plt.rcParams["axes.grid"] = False

print("Pytorch version: {}".format(torch.__version__))
print("GPU: {}".format(torch.cuda.is_available()))

print("Device name: ", torch.cuda.get_device_name(0))
print("Device count: ", torch.cuda.device_count())

# GPU 사용 가능 여부에 따라 device 정보 저장
device = "cuda" if torch.cuda.is_available() else "cpu"

Pytorch version: 1.10.0+cu102
GPU: True
Device name:  Tesla V100-PCIE-32GB
Device count:  1


In [34]:
path = '/opt/ml/Workspace/Art_classification/data'

train_path = os.path.join(path,"train")
test_path = os.path.join(path,"test")

In [35]:
label = {}

label["dog"]=0
label["elephant"]=1
label["giraffe"]=2
label["guitar"]=3
label["horse"]=4
label["house"]=5
label["person"]=6
label["0"]=-1 # for test label

In [60]:
def data_frame(data_path):
    labels = []
    for path in os.walk(data_path):
        label_type = path[0].split('/')[-1]
        if label_type == "train" or label_type == 'test':
            continue
        path_root = path[0]
        images = path[-1]

        idx = label[label_type]
        for image in images:
            img_path = os.path.join(path_root, image)
            label_idx = {"img_path": img_path, "label":idx}
            labels.append(label_idx)
            
    data_frame = pd.DataFrame(labels)
    data_frame = data_frame.sort_values(["label", "img_path"])
    data_frame = data_frame.reset_index(drop=True)
    return data_frame

In [61]:
train_df = data_frame(train_path)
test_df = data_frame(test_path)
train_df

Unnamed: 0,img_path,label
0,/opt/ml/Workspace/Art_classification/data/trai...,0
1,/opt/ml/Workspace/Art_classification/data/trai...,0
2,/opt/ml/Workspace/Art_classification/data/trai...,0
3,/opt/ml/Workspace/Art_classification/data/trai...,0
4,/opt/ml/Workspace/Art_classification/data/trai...,0
...,...,...
1693,/opt/ml/Workspace/Art_classification/data/trai...,6
1694,/opt/ml/Workspace/Art_classification/data/trai...,6
1695,/opt/ml/Workspace/Art_classification/data/trai...,6
1696,/opt/ml/Workspace/Art_classification/data/trai...,6


In [85]:
def get_img_stats(data_frame):
    img_info = dict(heights=[], widths=[], means=[], stds=[])
    for i in range(len(data_frame)):
        image = plt.imread(data_frame["img_path"][i])
        h, w, _ = image.shape
        img_info['heights'].append(h)
        img_info['widths'].append(w)
        img_info['means'].append(image.mean(axis=(0,1)))
        img_info['stds'].append(image.std(axis=(0,1)))
    return img_info

In [93]:
train_img_info = get_img_stats(train_df)
test_img_info = get_img_stats(test_df)

In [92]:
print(f'image size: ({int(np.mean(train_img_info["heights"]))} x {int(np.mean(train_img_info["widths"]))})')
print(f'RGB Mean: {np.mean(train_img_info["means"], axis=0) / 255.}')
print(f'RGB Standard Deviation: {np.mean(train_img_info["stds"], axis=0) / 255.}')

image size: (227 x 227)
RGB Mean: [0.5556861  0.50740065 0.45690217]
RGB Standard Deviation: [0.22876642 0.21754766 0.22090458]
