# Import & Mount

In [None]:
pip install split-folders

In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.layers import Dense, Flatten, MaxPooling2D
from tensorflow.keras import Input
from tensorflow.keras.layers import Dropout, BatchNormalization
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

In [None]:
import splitfolders
import os   
import cv2 
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.cm as cm
import tensorflow as tf
import pickle
from PIL import Image
import pandas as pd
from tqdm import tqdm
from keras.preprocessing import image

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 데이터 준비 

In [None]:
col=["Title", "Style","Gender", "Season"]

In [None]:
df = pd.read_csv("/content/drive/MyDrive/이미지인식 프로젝트/이미지마이닝_통합.csv", names = col)

In [None]:
# 성별 - 남자 

title_man = df['Title'][df['Gender'] == 0].tolist()

for name in tqdm( title_man ):
    try:
        im = Image.open("/content/drive/MyDrive/이미지인식 프로젝트/최종/{}.jpg".format(name))        
        im.save('images/man/{}.jpg'.format(name))
    except:
        pass



100%|██████████| 3221/3221 [07:28<00:00,  7.18it/s]


In [None]:
# 성별 - 여자 

title_woman = df['Title'][df['Gender'] == 1].tolist()


for name in tqdm( title_woman ):
    try:
        im = Image.open("/content/drive/MyDrive/이미지인식 프로젝트/최종/{}.jpg".format(name))        
        im.save('images/woman/{}.jpg'.format(name))
    except:
        pass



100%|██████████| 1892/1892 [04:13<00:00,  7.47it/s]


# Preprocessing

In [None]:
folder_path = '/content/images'   
label_names = os.listdir(folder_path)  
label_names.remove('.ipynb_checkpoints')
label_names

['man', 'woman']

In [None]:
# 각 이미지 경로 만들어주기 

dataset = {}

# 이미지와 라벨 리스트에 담기
for label in os.listdir(folder_path):  
    sub_path = folder_path+'/'+label+'/'  
    dataset[label] = []  
    for filename in os.listdir(sub_path): 
        dataset[label].append(sub_path+filename)
dataset

In [None]:
!mkdir resized

In [None]:
# 이미지 resize 후 resized폴더에 저장 

!mkdir resized/man
!mkdir resized/woman

In [None]:
dataset.items()

In [None]:
for label, filenames in dataset.items():  
    for filename in filenames:
        try:
            img = cv2.imread(filename)

            percent = 1
            if(img.shape[1] > img.shape[0]) :      
                percent = 224/img.shape[1]
            else :
                percent = 224/img.shape[0]

            img = cv2.resize(img, dsize=(0, 0), fx=percent, fy=percent, interpolation=cv2.INTER_LINEAR)

            y,x,h,w = (0,0,img.shape[0], img.shape[1])


            w_x = (224-(w-x))/2  
            h_y = (224-(h-y))/2

            if(w_x < 0):        
                w_x = 0
            elif(h_y < 0):
                h_y = 0

            M = np.float32([[1,0,w_x], [0,1,h_y]])  #(2*3 이차원 행렬)
            img_re = cv2.warpAffine(img, M, (224, 224)) #이동변환  # 패딩 

            # cv2.imwrite('{0}.jpg',image .format(file)) #파일저장
            cv2.imwrite('/content/resized/{0}/{1}'.format(label, filename.split("/")[-1]) , img_re)
        except:
            pass

# train, validation, test set 분할

In [None]:
splitfolders.ratio('resized', output='dataset', seed=77, ratio=(0.6, 0.2, 0.2))  

Copying files: 7827 files [00:01, 6753.82 files/s]


In [None]:
folder_path = '/content/dataset/train'
dataset = {}

# 이미지와 라벨 리스트에 담기
for label in os.listdir(folder_path):
    sub_path = folder_path+'/'+label+'/'
    dataset[label] = []
    for filename in os.listdir(sub_path):
        dataset[label].append(sub_path+filename)

dataset

In [None]:
label_gender = {'man':0, 'woman':1}

In [None]:
x_train, y_train = [], []

for label, filenames in dataset.items():
    for filename in filenames:
        image = cv2.imread(filename) 

        x_train.append(image)
        y_train.append(label_gender[label])

In [None]:
x_train, y_train = np.array(x_train), np.array(y_train)

In [None]:
x_train = x_train.astype('float32')

In [None]:
x_train.shape, y_train.shape

((4695, 224, 224, 3), (4695,))

# train set 증강 및 형변환

In [None]:
datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,    # 사다리꼴로 
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
)

In [None]:
folder_path

'/content/dataset/train'

In [None]:
for label in  os.listdir(folder_path):
    label_path = folder_path + '/' + label + '/'
    for filename in os.listdir(label_path): 
        filepath = label_path + filename

        img = load_img(filepath)

        x = img_to_array(img)

        x = x.reshape((1,) + x.shape)  # batch size = 1

        i = 0

        for batch in datagen.flow(x, batch_size=1,
                                save_to_dir=label_path, save_prefix=label, save_format='jpg'):
            i += 1
            if i > 2:   
                break  

In [None]:
folder_path = '/content/dataset/train'
dataset = {}

# 이미지와 라벨 리스트에 담기
for label in os.listdir(folder_path):
    sub_path = folder_path+'/'+label+'/'
    dataset[label] = []
    for filename in os.listdir(sub_path):
        dataset[label].append(sub_path+filename)

dataset

In [None]:
label_gender = {'man':0, 'woman':1}

In [None]:
x_train, y_train = [], []

for label, filenames in dataset.items():
    for filename in filenames:
        image = cv2.imread(filename)

        x_train.append(image)
        y_train.append(label_gender[label]) 

In [None]:
x_train, y_train = np.array(x_train), np.array(y_train)

In [None]:
x_train = x_train.astype('float32')

In [None]:
x_train.shape, y_train.shape # 이미지 수 늘어남 

((13450, 224, 224, 3), (13450,))

# validation, test set 형변환

In [None]:
folder_path = '/content/dataset/val'
dataset = {}

# 이미지와 라벨 리스트에 담기
for label in os.listdir(folder_path):
    sub_path = folder_path+'/'+label+'/'
    dataset[label] = []
    for filename in os.listdir(sub_path):
        dataset[label].append(sub_path+filename)

dataset

In [None]:
x_val, y_val = [], []

for label, filenames in dataset.items():
    for filename in filenames:
        image = cv2.imread(filename) # img를 array 형태로 변경

        x_val.append(image)
        y_val.append(label_gender[label]) # label을 index로 변경

In [None]:
x_val, y_val= np.array(x_val), np.array(y_val)

In [None]:
x_val = x_val.astype('float32')

In [None]:
x_val.shape, y_val.shape

((1564, 224, 224, 3), (1564,))

In [None]:
folder_path = '/content/dataset/test'
dataset = {}

# 이미지와 라벨 리스트에 담기
for label in os.listdir(folder_path):
    sub_path = folder_path+'/'+label+'/'
    dataset[label] = []
    for filename in os.listdir(sub_path):
        dataset[label].append(sub_path+filename)

dataset

In [None]:
x_test, y_test = [], []

for label, filenames in dataset.items():
    for filename in filenames:
        image = cv2.imread(filename) # img를 array 형태로 변경

        x_test.append(image)
        y_test.append(label_gender[label]) # label을 index로 변경

In [None]:
x_test, y_test = np.array(x_test), np.array(y_test)

In [None]:
x_test = x_test.astype('float32')

In [None]:
x_test.shape, y_test.shape

((1568, 224, 224, 3), (1568,))

# Zero-centering

In [None]:
def zero_mean(image):
    # zero-centering
    return np.mean(image, axis=0) 

In [None]:
zero_mean_img = zero_mean(x_train)

In [None]:
zero_mean_img.shape

(224, 224, 3)

In [None]:
x_train -= zero_mean_img

In [None]:
x_val -= zero_mean_img
x_test -= zero_mean_img

# Transfer Learning

In [None]:
## 일부 재학습 모델
base_model = ResNet50(include_top=False, input_shape = (224, 224,3), weights = 'imagenet')

for layer in base_model.layers[:-10]: 
    layer.trainable = False			

inputs = tf.keras.Input(shape=(224, 224, 3))

x = base_model(inputs, training=False) # batchnorm 부분 update 방지

x = tf.keras.layers.Flatten(input_shape=base_model.output_shape[1:])(x)
x = tf.keras.layers.Dense(256, activation='relu')(x)
x = tf.keras.layers.Dropout(0.5)(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='softmax')(x)  

model = tf.keras.Model(inputs, outputs)

model.compile(optimizer = tf.keras.optimizers.Adam( learning_rate= 0.0001),
                loss = 'sparse_categorical_crossentropy',
                metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


# train loss, validation loss 시각화 

In [None]:
def tr_plot(tr_data, start_epoch):
    #Plot the training and validation data
    tacc=tr_data.history['accuracy']
    tloss=tr_data.history['loss']
    vacc=tr_data.history['val_accuracy']
    vloss=tr_data.history['val_loss']
    Epoch_count=len(tacc)+ start_epoch
    Epochs=[]
    for i in range (start_epoch ,Epoch_count):
        Epochs.append(i+1)   
    index_loss=np.argmin(vloss)#  this is the epoch with the lowest validation loss
    val_lowest=vloss[index_loss]
    index_acc=np.argmax(vacc)
    acc_highest=vacc[index_acc]
    plt.style.use('fivethirtyeight')
    sc_label='best epoch= '+ str(index_loss+1 +start_epoch)
    vc_label='best epoch= '+ str(index_acc + 1+ start_epoch)
    fig,axes=plt.subplots(nrows=1, ncols=2, figsize=(20,8))
    axes[0].plot(Epochs,tloss, 'r', label='Training loss')
    axes[0].plot(Epochs,vloss,'g',label='Validation loss' )
    axes[0].scatter(index_loss+1 +start_epoch,val_lowest, s=150, c= 'blue', label=sc_label)
    axes[0].set_title('Training and Validation Loss')
    axes[0].set_xlabel('Epochs')
    axes[0].set_ylabel('Loss')
    axes[0].legend()
    axes[1].plot (Epochs,tacc,'r',label= 'Training Accuracy')
    axes[1].plot (Epochs,vacc,'g',label= 'Validation Accuracy')
    axes[1].scatter(index_acc+1 +start_epoch,acc_highest, s=150, c= 'blue', label=vc_label)
    axes[1].set_title('Training and Validation Accuracy')
    axes[1].set_xlabel('Epochs')
    axes[1].set_ylabel('Accuracy')
    axes[1].legend()
    plt.tight_layout
    #plt.style.use('fivethirtyeight')
    plt.show()

# 모델 학습 및 평가 

In [None]:
tr_plot(model.fit(x_train, y_train, epochs = 5, batch_size= 125, validation_data=(x_val, y_val)), 0)

In [None]:
model.evaluate(x_test, y_test)