<a href="https://colab.research.google.com/github/GalBuzi/colab/blob/main/vgg16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
from os import listdir, makedirs
from os.path import join, exists, expanduser
from tqdm import tqdm
from sklearn.metrics import log_loss, accuracy_score
from keras.preprocessing import image
from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50
from keras.applications import xception
from keras.applications import inception_v3
from keras.applications.vgg16 import preprocess_input, decode_predictions
from sklearn.linear_model import LogisticRegression

from keras.models import Sequential
from keras.layers import Dense, Dropout, Lambda, Flatten
from keras.optimizers import Adam, RMSprop
from sklearn.model_selection import train_test_split
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator, load_img
import random
import cv2
from keras.models import Sequential
from keras import layers
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation,GlobalMaxPooling2D
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.applications import VGG16
from keras.models import Model
from google.colab import files
import io
from google.colab import drive
drive.mount('gdrive')

csv_file = open('gdrive/My Drive/dog-breed-identification/labels.csv')
label = pd.read_csv(csv_file)

label_df = pd.DataFrame(label['breed'].value_counts()).reset_index()
label_df.columns = ['breed_name', 'count']
label_df.sort_values(by="count", ascending=False)
label = label[label['breed'].isin(label_df['breed_name'])]

# adding jpg ext.

label['id_ext']=label['id'].apply(lambda x:x+'.jpg')
label=label.reset_index()
label=label.drop(['index','id'],axis=1)

# one hot

label_onehot = pd.get_dummies(label,columns=['breed'],prefix=None)
print(label_onehot.columns)
label_onehot.columns = label_onehot.columns.str.replace(r'breed_', '') # remove breed_ prefix
label_onehot=label_onehot.rename(columns={'id_ext':'id'})
sample=random.choice(label_onehot['id'])
image=load_img('gdrive/My Drive/dog-breed-identification/train/' + sample)


In [None]:
def save_model(model):
    model_json = model.to_json()
    with open("model.json", "w") as json_file:
        json_file.write(model_json)
    model.save_weights("model.h5")

def load_model():
    json_file = open('model.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    loaded_model.load_weights("model.h5")
    return loaded_model

def confusion_matrix_predict(model,x_test):
    preds = model.predict(x_test)
    predicts = np.argmax(preds, axis=1) #return the predicted category in each sample in test set
    print(confusion_matrix(y_test,predicts))
    print('model accuracy on test set is: {}%'.format(accuracy_score(y_test,predicts)*100))
    sns.heatmap(confusion_matrix(y_test,predicts),cmap='Greens',annot=False, fmt='d')
    plt.xlabel('Prediction')
    plt.ylabel('True label')
    plt.title('Classification results on test set')
    print()

def plot_history(history):

    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper left')
    plt.show()
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'], loc='upper left')
    plt.show()

In [None]:

# Train and Test split

train_df, validate_df = train_test_split(label_onehot, test_size=0.1)
train_df = train_df.reset_index()
validate_df = validate_df.reset_index()

total_train = train_df.shape[0]
total_validate = validate_df.shape[0]

print(train_df.shape, validate_df.shape)

image_size = 224
input_shape = (image_size, image_size, 3)

epochs = 50
batch_size = 32

In [None]:

pre_trained_model = VGG16(input_shape=input_shape, include_top=False, weights="imagenet")

for layer in pre_trained_model.layers[:15]:
    layer.trainable = False

for layer in pre_trained_model.layers[15:]:
    layer.trainable = True

last_layer = pre_trained_model.get_layer('block5_pool')
last_output = last_layer.output

y_columns = list(label['breed'].unique())

# Flatten the output layer to 1 dimension
x = GlobalMaxPooling2D()(last_output)
# Add a fully connected layer with 512 hidden units and ReLU activation
x = Dense(512, activation='relu')(x)
# Add a dropout rate of 0.5
x = Dropout(0.5)(x)
# Add a final sigmoid layer for classification
x = layers.Dense(len(y_columns), activation='softmax')(x)

model = Model(pre_trained_model.input, x)

model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])

model.summary()

In [None]:

# Data Augmentation
train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    width_shift_range=0.1,
    height_shift_range=0.1
)


    
train_generator = train_datagen.flow_from_dataframe(
    train_df,
    r'gdrive/My Drive/dog-breed-identification/train',
    x_col='id',
    y_col=y_columns,
    class_mode='raw',
    target_size=(image_size, image_size),
    batch_size=batch_size
)

validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    validate_df,
    r'gdrive/My Drive/dog-breed-identification/train',
    x_col='id',
    y_col=y_columns,
    class_mode='raw',
    target_size=(image_size, image_size),
    batch_size=batch_size
)




In [None]:
history = model.fit_generator(
    train_generator,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=total_validate//batch_size,
    steps_per_epoch=total_train//batch_size)


loss, accuracy = model.evaluate_generator(validation_generator, total_validate//batch_size, workers=12)
print("Test: accuracy = %f  ;  loss = %f " % (accuracy, loss))


plot_history(history) 
confusion_matrix_predict(model,validate_df)
save_model(model)