In [2]:
import pandas as pd
import numpy as np
# from pandas_profiling import ProfileReport

import tensorflow as tf
config = tf.compat.v1.ConfigProto(gpu_options=tf.compat.v1.GPUOptions(allow_growth=True))
sess = tf.compat.v1.Session(config=config)
import tensorflow_hub as hub

from keras.preprocessing.image import ImageDataGenerator

from sklearn.model_selection import train_test_split
from IPython.display import display, Image

import matplotlib.pyplot as plt
from matplotlib.pyplot import imread

import os
import re

In [3]:
print("TF version:", tf.__version__)
physical_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs:", len(physical_devices))

TF version: 2.4.1
Num GPUs: 1


In [4]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [5]:
# Import Labels
labels = pd.read_csv("labels.csv")
labels.head()

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever


In [6]:
labels.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10222 entries, 0 to 10221
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      10222 non-null  object
 1   breed   10222 non-null  object
dtypes: object(2)
memory usage: 159.8+ KB


In [7]:
unique_breeds = labels['breed'].unique().shape[0]
print("Unique breeds:", unique_breeds)

Unique breeds: 120


In [8]:
labels['id'] = labels['id'] + '.jpg'

In [9]:
train, val = train_test_split(labels, test_size=0.2, shuffle=True)

In [10]:
print("Train shape:", train.shape)
print("Validation shape:", val.shape)

Train shape: (8177, 2)
Validation shape: (2045, 2)


In [11]:
# Datagen 

train_datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
)

validation_datagen = ImageDataGenerator(rescale=1./255)

test_datagen = ImageDataGenerator(rescale=1/255.)


In [12]:
# Training / Validation set

training_set = train_datagen.flow_from_dataframe(
    dataframe=train,
    directory='train/',
    x_col="id",
    y_col="breed",
    target_size=(150, 150),
    class_mode="categorical",
    batch_size=128
)

validation_set = validation_datagen.flow_from_dataframe(
    dataframe=val,
    directory='train/',
    x_col="id",
    y_col="breed",
    target_size=(150, 150),
    class_mode="categorical",
    batch_size=128
)

test_set = test_datagen.flow_from_directory (
    '',
    target_size = (150,150),
    batch_size = 128,
    classes=['test']
)

Found 8177 validated image filenames belonging to 120 classes.
Found 2045 validated image filenames belonging to 120 classes.
Found 10357 images belonging to 1 classes.


In [13]:

model_cnn = tf.keras.models.Sequential()
model_cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[150, 150, 3]))
model_cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
model_cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))
model_cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
model_cnn.add(tf.keras.layers.Flatten())
model_cnn.add(tf.keras.layers.Dense(units=64, activation='relu'))
model_cnn.add(tf.keras.layers.Dense(units=unique_breeds, activation='softmax'))

model_cnn.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

model_cnn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 148, 148, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 74, 74, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 72, 72, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 36, 36, 32)        0         
_________________________________________________________________
flatten (Flatten)            (None, 41472)             0         
_________________________________________________________________
dense (Dense)                (None, 64)                2654272   
_________________________________________________________________
dense_1 (Dense)              (None, 120)               7

In [14]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=2),
    tf.keras.callbacks.ModelCheckpoint(filepath='model.{epoch:02d}-{val_loss:.2f}.h5'),
    tf.keras.callbacks.TensorBoard(log_dir='./logs'),
]

In [15]:
model_cnn.fit(x = training_set, validation_data = validation_set, callbacks=[callbacks], verbose=1,
              epochs = 1)



<tensorflow.python.keras.callbacks.History at 0x20ad4fce250>

In [16]:
y_pred = model_cnn.predict(test_set)

In [17]:
submission = pd.read_csv('sample_submission.csv')
submission

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
1,00102ee9d8eb90812350685311fe5890,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
2,0012a730dfa437f5f3613fb75efcd4ce,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
3,001510bc8570bbeee98c8d80c8a95ec1,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
4,001a5f3114548acdefa3d4da05474c2e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10352,ffeda8623d4eee33c6d1156a2ecbfcf8,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
10353,fff1ec9e6e413275984966f745a313b0,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
10354,fff74b59b758bbbf13a5793182a9bbe4,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
10355,fff7d50d848e8014ac1e9172dc6762a3,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333


In [18]:

file_list = test_set.filenames
id_list = []
for name in file_list:
    m = re.sub('test/', '', name)
    m = re.sub('.jpg', '', m)
    id_list.append(m)

In [19]:
submission['id'] = id_list
submission.iloc[:,1:] = y_pred
submission

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,test\000621fb3cbb32d8935728e48679680e,0.008213,0.008550,0.008663,0.007178,0.010209,0.006338,0.007305,0.009026,0.008591,...,0.008880,0.007324,0.007750,0.009205,0.007664,0.008010,0.008240,0.008419,0.006475,0.008284
1,test\00102ee9d8eb90812350685311fe5890,0.008442,0.008362,0.008552,0.008253,0.008628,0.008018,0.008087,0.008503,0.008466,...,0.008407,0.008009,0.008188,0.008651,0.008257,0.008147,0.008320,0.008408,0.007807,0.008191
2,test\0012a730dfa437f5f3613fb75efcd4ce,0.008163,0.008666,0.008356,0.007827,0.009043,0.007481,0.008007,0.008865,0.008600,...,0.008716,0.007879,0.008059,0.008627,0.007991,0.008259,0.008157,0.008388,0.007527,0.008609
3,test\001510bc8570bbeee98c8d80c8a95ec1,0.008300,0.008527,0.008432,0.007876,0.008783,0.007590,0.008068,0.008688,0.008385,...,0.008544,0.007770,0.008049,0.008453,0.008168,0.008245,0.008374,0.008444,0.007652,0.008382
4,test\001a5f3114548acdefa3d4da05474c2e,0.008126,0.008777,0.008396,0.007577,0.009041,0.007234,0.008009,0.008823,0.008272,...,0.008619,0.007543,0.008001,0.008286,0.008152,0.008280,0.008326,0.008388,0.007523,0.008671
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10352,test\ffeda8623d4eee33c6d1156a2ecbfcf8,0.008256,0.008479,0.008461,0.007527,0.009294,0.007055,0.007676,0.008872,0.008512,...,0.008793,0.007521,0.007847,0.008869,0.007898,0.008079,0.008363,0.008473,0.007126,0.008325
10353,test\fff1ec9e6e413275984966f745a313b0,0.008438,0.008189,0.008723,0.007538,0.009443,0.006951,0.007381,0.008773,0.008640,...,0.008807,0.007459,0.007755,0.009419,0.007836,0.007767,0.008420,0.008567,0.006867,0.007858
10354,test\fff74b59b758bbbf13a5793182a9bbe4,0.008075,0.008458,0.008279,0.007493,0.009397,0.007053,0.007693,0.008905,0.008574,...,0.008911,0.007563,0.007810,0.009003,0.007757,0.007964,0.008259,0.008546,0.007119,0.008403
10355,test\fff7d50d848e8014ac1e9172dc6762a3,0.008329,0.008371,0.008456,0.008111,0.008757,0.007903,0.007969,0.008719,0.008758,...,0.008643,0.008050,0.008052,0.008918,0.008029,0.008132,0.008166,0.008474,0.007692,0.008223


In [21]:
final = submission.set_index('id')
final.to_csv('submission.csv')

In [22]:
final.head()

Unnamed: 0_level_0,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,beagle,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
test\000621fb3cbb32d8935728e48679680e,0.008213,0.00855,0.008663,0.007178,0.010209,0.006338,0.007305,0.009026,0.008591,0.010198,...,0.00888,0.007324,0.00775,0.009205,0.007664,0.00801,0.00824,0.008419,0.006475,0.008284
test\00102ee9d8eb90812350685311fe5890,0.008442,0.008362,0.008552,0.008253,0.008628,0.008018,0.008087,0.008503,0.008466,0.009027,...,0.008407,0.008009,0.008188,0.008651,0.008257,0.008147,0.00832,0.008408,0.007807,0.008191
test\0012a730dfa437f5f3613fb75efcd4ce,0.008163,0.008666,0.008356,0.007827,0.009043,0.007481,0.008007,0.008865,0.0086,0.008929,...,0.008716,0.007879,0.008059,0.008627,0.007991,0.008259,0.008157,0.008388,0.007527,0.008609
test\001510bc8570bbeee98c8d80c8a95ec1,0.0083,0.008527,0.008432,0.007876,0.008783,0.00759,0.008068,0.008688,0.008385,0.009042,...,0.008544,0.00777,0.008049,0.008453,0.008168,0.008245,0.008374,0.008444,0.007652,0.008382
test\001a5f3114548acdefa3d4da05474c2e,0.008126,0.008777,0.008396,0.007577,0.009041,0.007234,0.008009,0.008823,0.008272,0.009037,...,0.008619,0.007543,0.008001,0.008286,0.008152,0.00828,0.008326,0.008388,0.007523,0.008671
