In [13]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

loading dataset

In [14]:
train_dir = '/content/drive/MyDrive/AI_05/Section4/project_re/train'


In [3]:
train_size = len(os.listdir(train_dir))
train_size

10275

In [15]:
df = pd.read_csv("/content/drive/MyDrive/AI_05/Section4/project_re/labels.csv")
df.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [16]:
files = len(os.listdir('/content/drive/MyDrive/AI_05/Section4/project_re/train'))
files

10275

In [17]:
dog_breeds = sorted(df['breed'].unique())
n_classes = len(dog_breeds)
print(n_classes)


120


In [18]:
class_to_num = dict(zip(dog_breeds, range(n_classes)))

In [19]:
print(class_to_num)

{'affenpinscher': 0, 'afghan_hound': 1, 'african_hunting_dog': 2, 'airedale': 3, 'american_staffordshire_terrier': 4, 'appenzeller': 5, 'australian_terrier': 6, 'basenji': 7, 'basset': 8, 'beagle': 9, 'bedlington_terrier': 10, 'bernese_mountain_dog': 11, 'black-and-tan_coonhound': 12, 'blenheim_spaniel': 13, 'bloodhound': 14, 'bluetick': 15, 'border_collie': 16, 'border_terrier': 17, 'borzoi': 18, 'boston_bull': 19, 'bouvier_des_flandres': 20, 'boxer': 21, 'brabancon_griffon': 22, 'briard': 23, 'brittany_spaniel': 24, 'bull_mastiff': 25, 'cairn': 26, 'cardigan': 27, 'chesapeake_bay_retriever': 28, 'chihuahua': 29, 'chow': 30, 'clumber': 31, 'cocker_spaniel': 32, 'collie': 33, 'curly-coated_retriever': 34, 'dandie_dinmont': 35, 'dhole': 36, 'dingo': 37, 'doberman': 38, 'english_foxhound': 39, 'english_setter': 40, 'english_springer': 41, 'entlebucher': 42, 'eskimo_dog': 43, 'flat-coated_retriever': 44, 'french_bulldog': 45, 'german_shepherd': 46, 'german_short-haired_pointer': 47, 'gian

In [20]:
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.utils import to_categorical

def images_to_array(data_dir, df, image_size):
    image_names = df['id']
    image_labels = df['breed']
    data_size = len(image_names)
    
    #빈 데이터셋을 만들어 준다
    X = np.zeros([data_size,image_size[0],image_size[1],image_size[2]],dtype = np.uint8)
    y = np.zeros([data_size,1],dtype = np.uint8)

    for i in range(data_size):
        img_name = image_names[i]
        img_dir = os.path.join(data_dir, img_name+'.jpg')
        img_pixels = load_img(img_dir, target_size=image_size)
        X[i] = img_pixels
        y[i] = class_to_num[image_labels[i]]

    y = to_categorical(y)
    
    # 데이터들을 섞어주는 역할을 한다. 이렇게도 한다니 신기
    ind = np.random.permutation(data_size)
    X = X[ind]
    y = y[ind]

    print('Output Data Size:', X.shape)
    print('Output Label Size:', y.shape)

    return X, y


In [21]:
img_size = (224, 224, 3)
# selecting image size according to pretrained models.
# 만약 내가 새로 모델을 만든다면 내가 원하는 형태로 해도 문제 없을 듯.

In [22]:
train_input, train_target = images_to_array(train_dir, df, img_size)

Output Data Size: (10222, 224, 224, 3)
Output Label Size: (10222, 120)


## CNN 모델학습 (자체모델)


In [23]:
from sklearn.model_selection import train_test_split

train_input, val_input, train_target, val_target = train_test_split(train_input, train_target, test_size=0.2, random_state=50)

In [24]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [None]:
num_classes = 120

model = Sequential([
  layers.experimental.preprocessing.Rescaling(1./255, input_shape=img_size),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dropout(0.4),
  layers.Dense(num_classes, activation='softmax')
])

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics='accuracy')

checkpoint_cb = keras.callbacks.ModelCheckpoint('best-cnn-model.h5', save_best_only=True)
early_stopping_cb = keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 224, 224, 3)       0         
                                                                 
 conv2d (Conv2D)             (None, 224, 224, 16)      448       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 112, 112, 16)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 112, 112, 32)      4640      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 56, 56, 32)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 56, 56, 64)        1

In [30]:
epochs= 30

In [None]:
history = model.fit(train_input, train_target, epochs=epochs, validation_data=(val_input, val_target),
                    callbacks=[checkpoint_cb, early_stopping_cb])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30


In [None]:
model.save('/content/drive/MyDrive/AI_05/Section4/project_re/model_whole.h5')

## Resnet 이용해보기

In [25]:
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model


In [26]:
resnet = ResNet50(weights='imagenet', include_top=False)

for layer in resnet.layers:
    layer.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [34]:
x = resnet.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(120, activation='softmax')(x)
model = Model(resnet.input, predictions)

In [None]:
model.summary()

In [35]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics='accuracy')
checkpoint_cb = keras.callbacks.ModelCheckpoint('resnet-cnn-model.h5', save_best_only=True)
early_stopping_cb = keras.callbacks.EarlyStopping(patience=2, restore_best_weights=True)

history = model.fit(train_input, train_target, epochs=epochs, validation_data=(val_input, val_target),
                    callbacks=[checkpoint_cb, early_stopping_cb])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30


In [32]:
model.save('/content/drive/MyDrive/AI_05/Section4/project_re/resnet_model.h5')

In [None]:
names = list(os.listdir('/content/drive/MyDrive/AI_05/Section4/project_re/test'))

### 모델 테스트

In [None]:
def images_to_array2(test_data_dir, image_size):
    image_names = list(os.listdir('/content/drive/MyDrive/AI_05/Section4/project_re/test'))
    # image_labels = df['breed']
    data_size = len(image_names)
    
    #빈 데이터셋을 만들어 준다
    X = np.zeros([data_size,image_size[0],image_size[1],image_size[2]],dtype = np.uint8)

    for i in range(data_size):
        img_name = image_names[i]
        img_dir = os.path.join(test_data_dir, img_name)
        img_pixels = load_img(img_dir, target_size=image_size)
        X[i] = img_pixels


    print('Output Data Size:', X.shape)

    return X

In [None]:
test_data_dir = '/content/drive/MyDrive/AI_05/Section4/project_re/test'
test_input =images_to_array2(test_data_dir, img_size)

Output Data Size: (6, 224, 224, 3)


In [None]:
preds = model.predict(test_input)
print(preds)

[[1.00768764e-12 1.08890642e-14 9.56235785e-11 2.83447389e-06
  2.59623066e-05 3.29291595e-07 1.06447287e-15 6.30888641e-01
  4.21976204e-10 7.91081391e-07 8.25632358e-12 2.17453646e-07
  2.34679748e-10 2.84713252e-07 2.25192122e-11 2.20084999e-08
  6.97769065e-10 1.11379621e-10 9.40550047e-13 9.62011745e-06
  5.33068659e-11 2.10944876e-07 1.19474065e-03 2.15517578e-12
  2.26309282e-09 1.23486028e-14 1.78024430e-15 2.29294361e-08
  7.34263761e-09 2.91905078e-09 3.71951073e-14 3.18035149e-13
  5.09352456e-12 8.03668954e-08 6.27243882e-11 1.97211090e-19
  1.61804733e-13 3.67789005e-04 1.53856164e-11 1.64077662e-07
  8.47235349e-09 5.52603998e-15 2.48278949e-11 1.24335338e-06
  3.22866924e-08 2.15694494e-11 1.08587692e-07 3.71989518e-06
  2.03778903e-11 8.38571046e-10 1.84517412e-09 2.45675164e-05
  2.40568641e-11 1.15794059e-08 9.13453409e-14 5.02767591e-07
  1.20201673e-15 1.84763135e-10 6.99379226e-12 8.77882662e-12
  3.52138071e-03 4.58014273e-17 5.49219264e-12 8.29925592e-15
  3.4645

In [None]:
class_to_num

In [None]:
num_to_class = {v:k for k,v in class_to_num.items()}

In [None]:
ttt=list(np.argmax(preds, axis=-1))
ttt

[7, 7, 111, 7, 90, 110]

In [None]:
for i in ttt:
    breed = num_to_class.get(i)
    print(breed)

basenji
basenji
toy_terrier
basenji
rhodesian_ridgeback
toy_poodle
