In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import os
import random
from PIL import Image
import matplotlib.pyplot as plt
from tqdm.autonotebook import tqdm
import gc



def plot_random_images(dir_path, num_images, folder_name):
    # Get a list of all the image file names in the directory
    image_files = [f for f in os.listdir(dir_path) if f.endswith('.jpg') or f.endswith('.png')]

    # Shuffle the list of image file names
    random.shuffle(image_files)

    # Get the first num_images from the shuffled list
    image_files = image_files[:num_images]

    # Calculate the number of rows and columns for the plot
    num_rows = (num_images + 3) // 4
    num_cols = min(num_images, 4)

    # Create a figure to plot the images
    fig = plt.figure(figsize=(10, 10*num_rows/num_cols))

    # Loop through the selected images and plot each one
    for i in range(num_images):
        # Open the image file
        img_path = os.path.join(dir_path, image_files[i])
        img = Image.open(img_path)

        # Add a subplot to the figure and plot the image
        ax = fig.add_subplot(num_rows, num_cols, i+1)
        ax.imshow(img,cmap="gray")

        # Remove the axis labels
        ax.set_xticks([])
        ax.set_yticks([])

        # Add the image name as x-axis label
        ax.set_xlabel(image_files[i], fontsize=8, wrap=True)

    # Set the title of the plot to the folder name
    plt.suptitle(folder_name, fontsize=16)

    # Adjust the spacing between the subplots
    plt.subplots_adjust(wspace=0.05, hspace=0.05)

    # Show the plot
    plt.show()

  from tqdm.autonotebook import tqdm


In [3]:
training_dir = "./train"
testing_dir  = "./test"

In [4]:
labels = pd.read_csv('./labels.csv')

#Create list of alphabetically sorted labels.
classes = sorted(list(set(labels['breed'])))
n_classes = len(classes)
print('Total unique breed :----> {}'.format(n_classes), "\n")

class_to_num =  dict(zip(classes, range(n_classes)))

Total unique breed :----> 120 



In [5]:
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import os

In [6]:
input_shape = (331,331,3)

def images_to_array(directory, label_dataframe = labels, target_size = input_shape):
    image_labels = label_dataframe['breed']
    images = np.zeros([len(label_dataframe), target_size[0], target_size[1], 
                       target_size[2]],dtype=np.uint8) #as we have huge data and limited ram memory. uint8 takes less memory
    y = np.zeros([len(label_dataframe),1],dtype = np.uint8)
    
    for ix, image_name in enumerate(tqdm(label_dataframe['id'].values)):
        img_dir = os.path.join(directory, image_name+'.jpg')
        img = load_img(img_dir, target_size = target_size)
        images[ix] = img
        del img
        dog_breed = image_labels[ix]
        y[ix] = class_to_num[dog_breed]
        
    y = to_categorical(y)
    return images,y
        
        

In [7]:
import time 
t = time.time()

X,y = images_to_array('./train', labels[:])

print('runtime in seconds: {}'.format(time.time() - t))

  0%|          | 0/10222 [00:00<?, ?it/s]

runtime in seconds: 109.219229221344


In [8]:
from tensorflow.keras.callbacks import ReduceLROnPlateau,EarlyStopping

## Learning Rate Annealer 
lrr = ReduceLROnPlateau(monitor = "val_acc", factor = .01, patience = 3, 
                       min_lr= 1e-5, verbose = 1)
## Prepare Callbacks
EarlyStop = EarlyStopping(monitor='val_loss', patience = 10, 
                          restore_best_weights = True)

In [9]:
from tensorflow.keras.optimizers import Adam

# Hyperparameters
batch_size= 128
epochs=50
learn_rate=.001

adam=Adam(learning_rate=learn_rate, beta_1=0.9, beta_2=0.999, epsilon=None,  amsgrad=False)

In [10]:
from tensorflow.keras.layers import (Input, Lambda , Dense, Flatten, 
                                     ReLU, LeakyReLU, PReLU, BatchNormalization,
                                    Conv2D, MaxPool2D, Dropout, 
                                     GlobalAveragePooling2D)

from tensorflow.keras.models import (Model, Sequential)

In [11]:
## function to extract features from the dataset by a given pretrained model
img_size = (331,331,3)

def get_features(model_name, model_preprocessor, input_size, data):
    input_layer = Input(input_size)
    preprocessor = Lambda(model_preprocessor)(input_layer)
    base_model   = model_name(weights='imagenet', include_top=False,
                            input_shape = input_size)(preprocessor)
    avg = GlobalAveragePooling2D()(base_model)
    feature_extractor = Model(inputs = input_layer, outputs = avg)
    
    #Extract feature.
    
    feature_maps = feature_extractor.predict(data, verbose=1)
    print("{0} Feature Map Shape Are {1}".format(model_name, feature_maps.shape))
    return feature_maps

In [12]:
# Extract features using InceptionV3 

from keras.applications.inception_v3 import InceptionV3, preprocess_input
inception_preprocessor = preprocess_input
inception_features = get_features(InceptionV3,
                                  inception_preprocessor,
                                  img_size, X)


[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1113s[0m 3s/step
<function InceptionV3 at 0x00000185C6958FE0> Feature Map Shape Are (10222, 2048)


In [13]:
# Extract features using InceptionResNetV2
from keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input
inc_resnet_preprocessor = preprocess_input
inc_resnet_features = get_features(InceptionResNetV2,
                                   inc_resnet_preprocessor,
                                   img_size, X)

[1m320/320[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2791s[0m 9s/step
<function InceptionResNetV2 at 0x00000185C6958900> Feature Map Shape Are (10222, 1536)


In [None]:
# Extract features using NASNetLarge 
from keras.applications.nasnet import NASNetLarge, preprocess_input
nasnet_preprocessor = preprocess_input
nasnet_features = get_features(NASNetLarge,
                               nasnet_preprocessor,
                               img_size, X)

[1m 39/320[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1:15:59[0m 16s/step

In [None]:
# Extract features using Xception 
from keras.applications.xception import Xception, preprocess_input
xception_preprocessor = preprocess_input
xception_features = get_features(Xception,
                                 xception_preprocessor,
                                 img_size, X)

In [None]:
#Creating final featuremap by combining all extracted features

final_features = np.concatenate([inception_features,
                                 xception_features,
                                 nasnet_features,
                                 inc_resnet_features,], axis=-1) #axis=-1 to concatinate horizontally

print('Final feature maps shape', final_features.shape)

In [None]:
from tensorflow.keras import optimizers

model = Sequential()
model.add(Dropout(0.7, input_shape=(final_features.shape[1],)))
model.add(Dense(n_classes, activation='softmax'))

# Create optimizer instance
adam = optimizers.Adam()

model.compile(optimizer=adam,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Training the model.
history = model.fit(final_features, y,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_split=0.2,
                    callbacks=[lrr, EarlyStop])

In [None]:
# #deleting to free up ram memory

# del inception_features
# del xception_features
# del nasnet_features
# del inc_resnet_features
# del final_features
# del X
gc.collect()

In [None]:
from tensorflow.keras.preprocessing.image import load_img

def images_to_array_test(test_path, img_size=(331, 331, 3)):
    """
    Load images from the specified test directory and convert them into numpy arrays.

    Parameters:
    - test_path (str): Path to the directory containing the test images.
    - img_size (tuple): Target size of the images in the format (height, width, channels).

    Returns:
    - images (numpy.ndarray): A 4D numpy array containing the loaded images, with shape (num_samples, height, width, channels).
    """
    # Get the list of filenames in the test directory
    test_filenames = [os.path.join(test_path, fname) for fname in os.listdir(test_path)]

    # Get the number of images in the test directory
    data_size = len(test_filenames)

    # Create an empty array to store the images
    images = np.zeros([data_size, img_size[0], img_size[1], 3], dtype=np.uint8)

    # Load each image from the test directory
    for ix, img_path in enumerate(tqdm(test_filenames, desc='Loading images')):
        img = load_img(img_path, target_size=img_size)  # Load the image
        images[ix] = img  # Store the image in the array
        del img  # Delete the image object to free up memory

    print('Output Data Size:', images.shape)
    return images

In [None]:
test_data = images_to_array_test(testing_dir, img_size)

In [None]:
#Extract test data features.
def extact_features(data):
    inception_features = get_features(InceptionV3, inception_preprocessor, img_size, data)
    xception_features = get_features(Xception, xception_preprocessor, img_size, data)
    nasnet_features = get_features(NASNetLarge, nasnet_preprocessor, img_size, data)
    inc_resnet_features = get_features(InceptionResNetV2, inc_resnet_preprocessor, img_size, data)

    final_features = np.concatenate([inception_features,
                                     xception_features,
                                     nasnet_features,
                                     inc_resnet_features],axis=-1)
    
    print('Final feature maps shape', final_features.shape)
    
    #deleting to free up ram memory
    del inception_features
    del xception_features
    del nasnet_features
    del inc_resnet_features
    gc.collect()
    
    
    return final_features

test_features = extact_features(test_data)

In [None]:
#Free up some space.
del test_data
gc.collect()

In [None]:
import torch

In [None]:
print(type(model))


In [None]:
model.save('my_model2.h5')  # HDF5 파일로 저장

from tensorflow.keras.models import load_model

# # HDF5 파일에서 모델 불러오기
model = load_model('my_model2.h5')

In [None]:
#Predict test labels given test data features.

pred = model.predict(test_features)

predicted_labels = [classes[np.argmax(pred_i)] for pred_i in pred]

In [None]:
test_df  = pd.DataFrame(os.listdir(testing_dir),columns  = ['ImageName'])
test_df["ImagePath"] = test_df['ImageName'].apply(lambda x: training_dir + "/" + x)
test_df['predicted_labels'] = predicted_labels
test_df.head()

In [None]:
train_data = images_to_array_test(training_dir, img_size)
train_feature = extact_features(train_data)
#Free up some space.
del train_data
gc.collect()

#Predict test labels given test data features.

pred_train = model.predict(train_feature)

predicted_labels_train = [classes[np.argmax(pred_i)] for pred_i in pred_train]

In [None]:
np.save('final_features.npy', final_features)

In [None]:
train_df  = pd.DataFrame(os.listdir(training_dir),columns  = ['ImageName'])
train_df["ImagePath"] = train_df['ImageName'].apply(lambda x: training_dir + "/" + x)
train_df['ImageName'] = train_df['ImageName'].apply(lambda x: x.split(".")[0])

label_df = pd.read_csv("./labels.csv").rename(columns ={"id":"ImageName", "breed":"Actual_Breed"})
train_df = train_df.merge(label_df, on = ['ImageName'])

train_df['Pred_Breed'] = predicted_labels_train

In [None]:
plt.figure(figsize=(12,5))

plt.subplot(1,2,1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], 'ro', label='Validation Loss')
plt.title('Loss')
plt.grid(True)
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], 'ro', label='Validation Accuracy')
plt.title('Accuracy')
plt.grid(True)
plt.legend()

plt.show()

In [None]:
model.save('my_model2.h5')  # HDF5 파일로 저장

# from tensorflow.keras.models import load_model

# # HDF5 파일에서 모델 불러오기
model = load_model('my_model2.h5')

In [None]:
max(history.history['val_accuracy'])

In [None]:
#사진 예측
import numpy as np
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.applications.inception_v3 import preprocess_input as inception_preprocessor
from tensorflow.keras.applications.xception import preprocess_input as xception_preprocessor
from tensorflow.keras.applications.nasnet import preprocess_input as nasnet_preprocessor
from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input as inc_resnet_preprocessor
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input
from keras.applications.nasnet import NASNetLarge, preprocess_input
from keras.applications.xception import Xception, preprocess_input
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import (Input, Lambda , Dense, Flatten, 
                                     ReLU, LeakyReLU, PReLU, BatchNormalization,
                                    Conv2D, MaxPool2D, Dropout, 
                                     GlobalAveragePooling2D)

from tensorflow.keras.models import (Model, Sequential)

labels = pd.read_csv('./labels.csv')

#Create list of alphabetically sorted labels.
classes = sorted(list(set(labels['breed'])))

# # HDF5 파일에서 모델 불러오기
model = load_model('my_model2.h5')

def get_features(model_name, model_preprocessor, input_size, data):
    input_layer = Input(input_size)
    preprocessor = Lambda(model_preprocessor)(input_layer)
    base_model   = model_name(weights='imagenet', include_top=False,
                            input_shape = input_size)(preprocessor)
    avg = GlobalAveragePooling2D()(base_model)
    feature_extractor = Model(inputs = input_layer, outputs = avg)
    
    #Extract feature.
    
    feature_maps = feature_extractor.predict(data, verbose=1)
    print("{0} Feature Map Shape Are {1}".format(model_name, feature_maps.shape))
    return feature_maps

def preprocess_image(image_path, target_size):
    """이미지 로드 및 전처리"""
    img = load_img(image_path, target_size=target_size)
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    return img_array

def extract_and_combine_features(image_path):
    """사전 훈련된 모델들을 사용하여 특징 추출 및 결합"""
    # 이미지 전처리
    img_size = (331, 331,3)  # 모델에 따라 적절한 사이즈로 조정
    img = preprocess_image(image_path, target_size=img_size)

    # 특징 추출
    inception_features = get_features(InceptionV3, inception_preprocessor, img_size, img)
    xception_features = get_features(Xception, xception_preprocessor, img_size, img)
    nasnet_features = get_features(NASNetLarge, nasnet_preprocessor, img_size, img)
    inc_resnet_features = get_features(InceptionResNetV2, inc_resnet_preprocessor, img_size, img)

    # 특징 결합
    final_features = np.concatenate([inception_features, xception_features, nasnet_features, inc_resnet_features], axis=-1)
    return final_features

def predict_breed(image_path):
    """새로운 이미지에 대한 품종 예측 수행"""
    # 특징 추출 및 결합
    features = extract_and_combine_features(image_path)

    # 모델을 사용한 예측
    prediction = model.predict(features)
    
    # 확률이 가장 높은 클래스 결정
    predicted_class = np.argmax(prediction, axis=1)
    predicted_class_name = classes[predicted_class[0]]
    return predicted_class_name

# 새 이미지에 대한 예측 수행
image_path = 'dingo.jpg'
predicted_breed = predict_breed(image_path)
print(f"Predicted breed: {predicted_breed}")
