# **실습 문제**
* 주어진 코드는 새의 품종을 분류하는 코드
* 랜덤 서치를 이용하여 하이퍼 파라미터 튜닝 수행
* 하이퍼 파라미터의 개수는 최소 3개 이상 적용

In [2]:
import os
import random
import shutil
from sklearn.model_selection import train_test_split
import imgaug.augmenters as iaa
from PIL import Image

### **# 1. 데이터셋 경로 설정**

In [None]:
# 1. 데이터셋 경로 설정
original_data_dir = "cifar10_images" 
output_base_dir = "cifar10_dataset"
train_dir = os.path.join(output_base_dir, "train")
valid_dir = os.path.join(output_base_dir, "valid")
test_dir = os.path.join(output_base_dir, "test")

### **2. 데이터셋 분할: 0.7 (Train), 0.2 (Validation), 0.1 (Test)**

In [None]:
def split_data(original_dir, train_ratio=0.7, valid_ratio=0.2):
    # 클래스별 데이터를 나눔
    class_dirs = [os.path.join(original_dir, class_name) for class_name in os.listdir(original_dir)]
    for class_dir in class_dirs:
        if not os.path.isdir(class_dir):
            continue

        # 각 클래스의 이미지 경로
        images = [os.path.join(class_dir, img) for img in os.listdir(class_dir) if img.endswith(('.png', '.jpg'))]
        random.shuffle(images)

        # 분할
        train_split = int(len(images) * train_ratio)
        valid_split = int(len(images) * (train_ratio + valid_ratio))

        train_images = images[:train_split]
        valid_images = images[train_split:valid_split]
        test_images = images[valid_split:]

        # 데이터를 출력 디렉토리에 저장
        for output_dir, image_set in zip([train_dir, valid_dir, test_dir], [train_images, valid_images, test_images]):
            class_output_dir = os.path.join(output_dir, os.path.basename(class_dir))
            os.makedirs(class_output_dir, exist_ok=True)
            for image_path in image_set:
                shutil.copy(image_path, class_output_dir)

split_data(original_data_dir)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten,Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input
from tensorflow.keras.preprocessing import image
import os

train_folder='Birds/train'
test_folder='Birds/test'

class_reduce=0.1 # 부류 수 줄여서 데이터양 줄임(속도와 메모리 효율을 위해)
no_class=int(len(os.listdir(train_folder))*class_reduce) # 부류 개수

x_train,y_train=[],[]
for i,class_name in enumerate(os.listdir(train_folder)):
    if i<no_class: # 13~14행이 지정한 부류만 사용
        for fname in os.listdir(train_folder+'/'+class_name):
            img=image.load_img(train_folder+'/'+class_name+'/'+fname,target_size=(224,224))
            if len(img.getbands())!=3:
                print("주의: 유효하지 않은 영상 발생",class_name,fname)
                continue
            x=image.img_to_array(img)
            x=preprocess_input(x)
            x_train.append(x)
            y_train.append(i)

x_test,y_test=[],[]
for i,class_name in enumerate(os.listdir(test_folder)):
    if i<no_class: # 13~14행이 지정한 부류만 사용
        for fname in os.listdir(test_folder+'/'+class_name):
            img=image.load_img(test_folder+'/'+class_name+'/'+fname,target_size=(224,224))
            if len(img.getbands())!=3:
                print("주의: 유효하지 않은 영상 발생",class_name,fname)
                continue
            x=image.img_to_array(img)
            x=preprocess_input(x)
            x_test.append(x)
            y_test.append(i)

x_train=np.asarray(x_train)
y_train=np.asarray(y_train)
x_test=np.asarray(x_test)
y_test=np.asarray(y_test)
y_train=tf.keras.utils.to_categorical(y_train,no_class)
y_test=tf.keras.utils.to_categorical(y_test,no_class)

base_model=ResNet50(weights='imagenet',include_top=False,input_shape=(224,224,3))
cnn=Sequential()
cnn.add(base_model)
cnn.add(Flatten())
cnn.add(Dense(1024,activation='relu'))
cnn.add(Dense(no_class,activation='softmax'))

cnn.compile(loss='categorical_crossentropy',optimizer=Adam(0.00002),metrics=['accuracy'])
hist=cnn.fit(x_train,y_train,batch_size=16,epochs=10,validation_data=(x_test,y_test),verbose=1)

res=cnn.evaluate(x_test,y_test,verbose=0)
print("정확률은",res[1]*100)