In [None]:
import numpy as np                          # linear algebra
import os                                   # used for loading the data
from sklearn.metrics import confusion_matrix# confusion matrix to carry out error analysis
import seaborn as sn                        # heatmap
from sklearn.utils import shuffle           # shuffle the data
import matplotlib.pyplot as plt             # 2D plotting library
import cv2                                  # image processing library
import tensorflow as tf      

In [None]:
class_names = ['mountain', 'street', 'glacier', 'buildings', 'sea', 'forest']
class_names_label = {'mountain': 0,
                    'street' : 1,
                    'glacier' : 2,
                    'buildings' : 3,
                    'sea' : 4,
                    'forest' : 5
                    }
nb_classes = 6

In [None]:
def load_data():
    datasets = ['seg_train/seg_train', 'seg_test/seg_test']
    size = (150,150)
    output = []
    
    for dataset in datasets:
        directory = '../input/intel-image-classification/'+dataset
        images=[]
        labels=[]
        for folder in os.listdir(directory):
            curr_label = class_names_label[folder]
            for file in os.listdir(directory+r'/'+folder):
                img_path = directory + r'/' + folder +r'/'+ file
                curr_img = cv2.imread(img_path)
                curr_img = cv2.resize(curr_img, (150,150))
                images.append(curr_img)
                labels.append(curr_label)
        images, labels = shuffle(images, labels)
        images = np.array(images, dtype='float32')
        labels = np.array(labels, dtype='int32')
        
        output.append((images, labels))
    
    return output

In [None]:
(train_images, train_labels), (test_images, test_labels) = load_data()

In [None]:
print ("Number of training examples: " + str(train_labels.shape[0]))
print ("Number of testing examples: " + str(test_labels.shape[0]))
print ("Each image is of size: " + str(train_images.shape[1:]))
#print(train_images[0])

In [None]:
sizes = np.bincount(train_labels)
explode=(0,0,0,0,0,0)
plt.pie(sizes, explode=explode, labels=class_names, autopct='%1.1f%%', shadow=True, startangle=150)
plt.axis('equal')
plt.title('Proportion of each class')

plt.show()

In [None]:
# Normalizing the data (sclaing the values to be b/w 1-0)
train_images = train_images / 255.0
test_images = test_images / 255.0

In [None]:
# Lets look at one of our scaled and resized images
index = np.random.randint(train_images.shape[0])
plt.figure()
plt.imshow(train_images[index])
plt.grid(False)
plt.title('Image #{} : '.format(index) + class_names[train_labels[index]])
plt.show()

In [None]:
fig = plt.figure(figsize=(10,10))
fig.suptitle("Some examples of images of the dataset", fontsize=16)
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_images[i], cmap=plt.cm.binary)
    plt.xlabel(class_names[train_labels[i]])
plt.show()

In [None]:
# Building the model

model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(65, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(6, activation=tf.nn.softmax)
])
model.summary()

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])

In [None]:
history = model.fit(train_images, train_labels, batch_size=64, epochs=32, validation_split=0.2)

In [None]:
fig = plt.figure(figsize=(15,10))
plt.subplot(221)
plt.plot(history.history['acc'], 'b-', label = 'train accuracy')
plt.plot(history.history['val_acc'], 'r-', label = 'dev accuracy')
plt.title('train accuracy VS dev accuracy')
plt.xlabel('accuracy')
plt.ylabel('epochs')
plt.legend()

plt.subplot(222)
plt.plot(history.history['loss'], 'b-', label = 'train loss')
plt.plot(history.history['val_loss'], 'r-', label = 'train loss')
plt.title('train loss VS dev loss')
plt.xlabel('loss')
plt.ylabel('epochs')
plt.legend()

plt.show()


In [None]:
# Check how well is the model doing on the test set
test_loss = model.evaluate(test_images, test_labels)

In [None]:
index = np.random.randint(test_images.shape[0])

img = (np.expand_dims(test_images[index], 0))
predictions = model.predict(img)
print('Prediction probabilities: ',predictions)
pred_img = np.argmax(predictions[0]) # index of the highest probability
pred_label = class_names[pred_img]
true_label = class_names[test_labels[index]]

title = 'Pred: {} VS True: {}'.format(pred_label, true_label)

plt.figure()
plt.imshow(test_images[index])
#plt.grid(False)
plt.title(title)
plt.show()

In [None]:
# Observing, on what type of pictures  does the model have a porblem with
def print_mislabelled_images(class_names, test_images, test_labels, pred_labels):
    no_prob = (test_labels == pred_labels)
    mislabelled_indices = np.where(no_prob == 0)
    mislabelled_images = test_images[mislabelled_indices]
    mislabelled_labels = pred_labels[mislabelled_indices]
    
    fig = plt.figure(figsize=(10,10))
    fig.suptitle('Examples of mislabelled images by the model: ', fontsize=16)
    for i in range(36):
        plt.subplot(6,6,i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(mislabelled_images[i], cmap=plt.cm.binary)
        plt.xlabel(class_names[mislabelled_labels[i]])
        
    plt.show()

In [None]:
predictions = model.predict(test_images)
pred_labels = np.argmax(predictions, axis = 1)
print_mislabelled_images(class_names, test_images, test_labels, pred_labels)

In [None]:
CM = confusion_matrix(test_labels, pred_labels)
ax = plt.axes()
sn.set(font_scale = 1.4)
sn.heatmap(CM, annot=False, annot_kws={'size': 16}, xticklabels=class_names, yticklabels=class_names, ax=ax)
ax.set_title('Confusion Matrix')
plt.show()