In [1]:
from google.colab import drive
drive.mount('/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /gdrive


In [2]:
import zipfile

print ("Uncompressing zip file")
zip_ref = zipfile.ZipFile('/gdrive/My Drive/Colab Notebooks/'
                          'Intell Scene Identification Challenge/train-scene classification.zip', 'r')
zip_ref.extractall('train_scene_classification/')
zip_ref.close()
print("Finished")

Uncompressing zip file
Finished


In [0]:
!pip install -U -q h5py

In [4]:
import pandas as pd
import numpy as np

import cv2 as cv

from keras.utils import to_categorical, Sequence

from keras.layers import (Dense, Flatten, Add, BatchNormalization,
                          Conv2D, MaxPooling2D, AveragePooling2D,
                          Input, Activation, Flatten, Dropout)

from keras.models import Model

from keras.optimizers import Adadelta
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint

from sklearn.model_selection import RepeatedKFold

Using TensorFlow backend.


In [0]:
IMAGES_PATH = "train_scene_classification/train/"

train = pd.read_csv("train_scene_classification/train.csv")

test = pd.read_csv("/gdrive/My Drive/Colab Notebooks/"
                   "Intell Scene Identification Challenge/test_WyRytb0.csv")

ids = train['image_name']
labels = train['label']

In [0]:
class DataGenerator(Sequence):
  
  """ DataGenerator Class,
      Generate images and labels on the go
      Less memory
      Fast
  """
  
  def __init__(self, list_IDs, image_names, labels,
               batch_size=64, width = 150, height=150,
               num_channels=3, num_classes=6, shuffle=False):
    
    """ Initialize parameters,
    """
    
    self.list_IDs = list_IDs
    self.image_names = image_names
    self.labels = labels
    self.batch_size = batch_size
    self.width = width
    self.height = height
    self.num_channels = num_channels
    self.num_classes = num_classes
    self.shuffle = shuffle
    self.on_epoch_end()
    
  def on_epoch_end(self):
    
    """ Updates indexes after each epoch
    """
    
    self.indexes = np.arange(len(self.list_IDs))
    if self.shuffle == True:
      np.random.shuffle(self.indexes)
      
  def get_input(self, ID):
    
    """ Read the image from the ID and return image
    """
    
    img = cv.imread(IMAGES_PATH+self.image_names[ID])
    if img.shape[0] != self.width or img.shape[1] != self.height:
      img = cv.resize(img, (self.width, self.height), interpolation = cv.INTER_CUBIC)
    img = img / 255
    return img
      
  def __data_generation(self, list_IDs_temp):
    
    """ Generates batch of X and y
    """
    
    X = np.zeros((self.batch_size, self.width, self.height, self.num_channels))
    y = np.zeros((self.batch_size, self.num_classes))
    
    for i, ID in enumerate(list_IDs_temp):
      
      X[i,] = self.get_input(ID)
      lbl = to_categorical(self.labels[ID], num_classes=self.num_classes)
      y[i,] = lbl
    return X, y  
  
  def __len__(self):
    
    """ Denotes the number of batches per epoch
    """
    
    return int(np.floor(len(self.list_IDs) / self.batch_size))
  
  def __getitem__(self, index):
    
    """ Generate one batch of data
    """
    
    indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
    list_IDs_temp = [self.list_IDs[k] for k in indexes]
    X, y = self.__data_generation(list_IDs_temp)
    return X, y

In [9]:
input_img = Input(shape=(150, 150, 3))

conv1 = Conv2D(16, kernel_size=(3,3), padding='same')(input_img)
conv1 = Conv2D(16, kernel_size=(3,3), padding='same')(conv1)
bn1 = BatchNormalization()(conv1)
act1 = Activation('relu')(bn1)
max_pool1 = MaxPooling2D(pool_size=(2,2), strides=(2,2))(act1)

conv2 = Conv2D(32, kernel_size=(3,3), padding='same')(max_pool1)
conv2 = Conv2D(32, kernel_size=(3,3), padding='same')(conv2)
bn2 = BatchNormalization()(conv2)
act2 = Activation('relu')(bn2)
max_pool2 = MaxPooling2D(pool_size=(2,2), strides=(2,2))(act2)

conv3 = Conv2D(64, kernel_size=(3,3), padding='same')(max_pool2)
conv3 = Conv2D(64, kernel_size=(3,3), padding='same')(conv3)
bn3 = BatchNormalization()(conv3)
act3 = Activation('relu')(bn3)
max_pool3 = MaxPooling2D(pool_size=(2,2), strides=(2,2))(act3)

conv4 = Conv2D(128, kernel_size=(3,3), padding='same')(max_pool3)
conv4 = Conv2D(128, kernel_size=(3,3), padding='same')(conv4)
bn4 = BatchNormalization()(conv4)
act4 = Activation('relu')(bn4)
max_pool4 = MaxPooling2D(pool_size=(2,2), strides=(2,2))(act4)

conv5 = Conv2D(256, kernel_size=(3,3), padding='same')(max_pool4)
conv5 = Conv2D(256, kernel_size=(3,3), padding='same')(conv5)
bn5 = BatchNormalization()(conv5)
act5 = Activation('relu')(bn5)
max_pool5 = MaxPooling2D(pool_size=(2,2), strides=(2,2))(act5)

conv6 = Conv2D(512, kernel_size=(3,3), padding='same')(max_pool5)
conv6 = Conv2D(512, kernel_size=(3,3), padding='same')(conv6)
bn6 = BatchNormalization()(conv6)
act6 = Activation('relu')(bn6)
max_pool6 = MaxPooling2D(pool_size=(2,2), strides=(2,2))(act6)

flat = Flatten()(max_pool6)

fc1 = Dense(700, activation='relu')(flat)
drop1 = Dropout(0.25)(fc1)
fc2 = Dense(700, activation='relu')(drop1)
drop2 = Dropout(0.25)(fc2)

output_label = Dense(6, activation='sigmoid')(drop2)

model = Model(inputs=[input_img], outputs=[output_label])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 150, 150, 3)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 150, 150, 16)      448       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 150, 150, 16)      2320      
_________________________________________________________________
batch_normalization_1 (Batch (None, 150, 150, 16)      64        
_________________________________________________________________
activation_1 (Activation)    (None, 150, 150, 16)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 75, 75, 16)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 75, 75, 32)        4640      
__________

In [0]:
adadelta = Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)
model.compile(optimizer=adadelta, loss='binary_crossentropy',
              metrics=['categorical_accuracy', 'binary_accuracy'])

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1,
                              patience=5, min_lr=0.0001, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=8,
                               verbose=1, mode='auto')
checkpointer = ModelCheckpoint(filepath='weights.{epoch:02d}-{val_loss:.3f}.hdf5',
                               verbose=1, save_best_only=True)

In [0]:
all_ids = np.arange(0, len(ids))
train_ids = all_ids[0:14600]
validate_ids = all_ids[14600:]

In [12]:
train_generator = DataGenerator(train_ids, ids, labels, batch_size=64)
validate_generator = DataGenerator(validate_ids, ids, labels, batch_size=64)

history = model.fit_generator(generator=train_generator, epochs=30,
                                     verbose=1,
                                     callbacks=[reduce_lr, early_stopping, checkpointer],
                                     validation_data = validate_generator,
                                     use_multiprocessing = True, workers=8)

Epoch 1/30


Epoch 00001: val_loss improved from inf to 0.64971, saving model to weights.01-0.650.hdf5
Epoch 2/30

Epoch 00002: val_loss improved from 0.64971 to 0.40651, saving model to weights.02-0.407.hdf5
Epoch 3/30

Epoch 00003: val_loss improved from 0.40651 to 0.31746, saving model to weights.03-0.317.hdf5
Epoch 4/30

Epoch 00004: val_loss improved from 0.31746 to 0.18484, saving model to weights.04-0.185.hdf5
Epoch 5/30

Epoch 00005: val_loss improved from 0.18484 to 0.14960, saving model to weights.05-0.150.hdf5
Epoch 6/30

Epoch 00006: val_loss did not improve from 0.14960
Epoch 7/30

Epoch 00007: val_loss did not improve from 0.14960
Epoch 8/30
Epoch 8/30

Epoch 00008: val_loss did not improve from 0.14960
Epoch 9/30

Epoch 00008: val_loss did not improve from 0.14960
Epoch 00009: val_loss did not improve from 0.14960
Epoch 10/30

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.1.

Epoch 00010: val_loss did not improve from 0.14960
Epoch 11/30

Epoch 00011: val_lo

In [13]:
!ls

sample_data		    weights.02-0.407.hdf5  weights.05-0.150.hdf5
train_scene_classification  weights.03-0.317.hdf5  weights.11-0.086.hdf5
weights.01-0.650.hdf5	    weights.04-0.185.hdf5  weights.13-0.085.hdf5


In [0]:
from keras.models import load_model
model = load_model("weights.13-0.085.hdf5")

In [0]:
test_ids = test['image_name']

In [0]:
y_pred = np.zeros((len(test_ids), 6))

for i, ID in enumerate(test_ids):

  img = np.zeros((1, 150, 150, 3))
  img_temp = cv.imread(IMAGES_PATH+ID)
  if img_temp.shape[0] != 150 or img_temp.shape[1] != 150:
    img_temp = cv.resize(img_temp, (150, 150), interpolation = cv.INTER_CUBIC)
  img[0,] = img_temp/255
  y_pred[i,] = model.predict(img)

In [0]:
y_sub = np.zeros((len(test_ids)), dtype=np.int8)

for i, lbl in enumerate(y_pred):
  
  y_sub[i] = np.argmax(lbl)

In [0]:
submission = pd.DataFrame({'image_name':test_ids,
                           'label':y_sub})
submission.to_csv('submission_two.csv', index=None)

In [0]:
from google.colab import files
files.download('submission_two.csv')