<a href="https://colab.research.google.com/github/HuyenNguyenHelen/INFO-5505---Machine-learning/blob/main/HuyenNguyen_Assignment_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Assignment 6: Image Classification with Convolutional Neural Network
Dataset: [Intel Image Classification dataset](https://www.kaggle.com/puneet6060/intel-image-classification/data#)

Goal: Classify images into 6 categories: building, forest, glacier, mountain, sea, and street.

In [3]:
# Import essential libraries
import tensorflow as tf
import timeit
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob as gb 
import matplotlib.pyplot as plt
import cv2
import os
import seaborn as sns



In [4]:
# Checking the GPU installed
# %tensorflow_version 2.x
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


# Loading data from Kaggle

In [5]:
# Install Kaggle library
!pip install -q kaggle

# Loading kaggle API key file
!cd ~/.kaggle
!cp /content/kaggle.json ~/.kaggle/kaggle.json

In [6]:
# Downloading the dataset from Kaggle
!kaggle datasets download -d puneet6060/intel-image-classification

# Unzipping the dataset
!unzip intel-image-classification.zip


intel-image-classification.zip: Skipping, found more recently modified local copy (use --force to force download)
Archive:  intel-image-classification.zip
replace seg_pred/seg_pred/10004.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace seg_pred/seg_pred/10005.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

# Exploring Data

In [5]:
# Exploring sizes of the training, testing and prediction sets

train_path = '/content/seg_train/seg_train'
test_path = '/content/seg_test/seg_test'
pred_path = '/content/seg_pred/seg_pred'

def exploring_size (path):
  size =0
  try:
    for name in os.listdir(path):
      size += len(os.listdir(path + '/' + name))
  except:
    size += len(os.listdir(path))
  return size

print('The number of images in the training set: ', exploring_size(train_path))
print('The number of images in the test set: ', exploring_size(test_path))
print('The number of images in the prediction set: ', exploring_size(pred_path))

The number of images in the training set:  14034
The number of images in the test set:  3000
The number of images in the prediction set:  7301


In [6]:
# Exploring data's labels 
folder_names = os.listdir(train_path)
print('All labels: ', folder_names)

# Exploring data distribution in the training set
print ('-'*30, '\nData distributed in the training set:')
for name in folder_names:
  print (name, ': ', len(os.listdir(train_path + '/' + name)))

# Exploring data distributed in the test set
print ('-'*30, '\nData distributed in the test set:')
for name in folder_names:
  print (name, ': ', len(os.listdir(test_path + '/' + name)))

All labels:  ['street', 'mountain', 'buildings', 'sea', 'glacier', 'forest']
------------------------------ 
Data distributed in the training set:
street :  2382
mountain :  2512
buildings :  2191
sea :  2274
glacier :  2404
forest :  2271
------------------------------ 
Data distributed in the test set:
street :  501
mountain :  525
buildings :  437
sea :  510
glacier :  553
forest :  474


In [7]:
# Adding a distribution graph

In [8]:
# Exploring the images' sizes
def explore_image_size (path):
  size = []
  for name in os.listdir(path):
    file_paths = gb.glob(pathname = path+ '/'+ name+'/*.jpg')
    for p in file_paths:
      size.append(plt.imread(p).shape)
  return pd.DataFrame(size, columns = ['H', 'W', 'D']).value_counts()
  
print('Image sizes in the training set\n',  explore_image_size (train_path))
print('\nImage sizes in the test set\n',  explore_image_size (test_path))


Image sizes in the training set
 H    W    D
150  150  3    13986
113  150  3        7
135  150  3        3
111  150  3        3
144  150  3        2
143  150  3        2
142  150  3        2
146  150  3        2
136  150  3        2
134  150  3        2
108  150  3        2
123  150  3        2
97   150  3        1
100  150  3        1
81   150  3        1
103  150  3        1
105  150  3        1
110  150  3        1
102  150  3        1
124  150  3        1
115  150  3        1
119  150  3        1
120  150  3        1
149  150  3        1
131  150  3        1
133  150  3        1
140  150  3        1
141  150  3        1
145  150  3        1
147  150  3        1
76   150  3        1
dtype: int64

Image sizes in the test set
 H    W    D
150  150  3    2993
149  150  3       1
141  150  3       1
131  150  3       1
110  150  3       1
81   150  3       1
76   150  3       1
72   150  3       1
dtype: int64


We can see  that most images have the size 150x150x3. It's reasonable to resize all images to this size. However, we want to reduce all images into a size of 64x64x3 to reduce training time. 

# Data Preprocessing
For data processing, we would like to rescale our images so that every image's pixel value ranges between 0 and 1.
Futher, we should apply data augmentation since our dataset is not large. Data Augmentation is a wise way to inc....for the training set that can help the model avoid overfitting. Some data augmentations we could use are rotating images with some random angles. 

In [9]:
# Encoding data labels
encoded_labels = dict()
for v, k in enumerate(folder_names):
  encoded_labels[k]=v  
print('labels encoded:', encoded_labels)


labels encoded: {'street': 0, 'mountain': 1, 'buildings': 2, 'sea': 3, 'glacier': 4, 'forest': 5}


In [10]:

# Resizing images
def resize_image_xy (path):
  X, y = [],[]
  for name in os.listdir(path):
    file_paths = gb.glob(pathname = path+ '/'+ name+'/*.jpg')
    for p in file_paths:
      image = plt.imread(p)
      resized_image = cv2.resize(image,(64,64) )
      X.append(resized_image)
      y.append(encoded_labels[name])
  return X, y

def resize_image_x (path):
  X = []
  for name in os.listdir(path):
    path_file = path+'/'+ name
    image = plt.imread(path_file)
    resized_image = cv2.resize(image,(64,64) )
    X.append(resized_image)
  return X


# For images in the training set
X_train, y_train = resize_image_xy (train_path)
print('The number of images in the training set:', len(X_train), len(y_train))

# For images in the test set
X_test, y_test = resize_image_xy (test_path)
print('The number of images in the testing set:', len(X_test), len(y_test))

#For images in the predicting set
X_pred = resize_image_x (pred_path)
print('The number of images in the predicting set:', len(X_pred))


The number of images in the training set: 14034 14034
The number of images in the testing set: 3000 3000
The number of images in the predicting set: 7301


In [11]:
import sklearn
from sklearn.utils import shuffle
X_train, y_train = sklearn.utils.shuffle(np.array(X_train), np.array(y_train), random_state = 42)
X_test, y_test = sklearn.utils.shuffle(np.array(X_test), np.array(y_test))
X_pred = np.array(X_pred)
print('The shape of X_train:', X_train.shape)
print('The shape of y_train:', y_train.shape)
print('The shape of X_test:', X_test.shape)
print('The shape of y_test:', y_test.shape)
print('The shape of X_pred:', X_pred.shape)

The shape of X_train: (14034, 64, 64, 3)
The shape of y_train: (14034,)
The shape of X_test: (3000, 64, 64, 3)
The shape of y_test: (3000,)
The shape of X_pred: (7301, 64, 64, 3)


In [12]:
'''
# Reformat datatypes of the input and target data
X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)
X_pred = np.array(X_pred)
print('The shape of X_train:', X_train.shape)
print('The shape of y_train:', y_train.shape)
print('The shape of X_test:', X_test.shape)
print('The shape of y_test:', y_test.shape)
print('The shape of X_pred:', X_pred.shape)
'''

"\n# Reformat datatypes of the input and target data\nX_train = np.array(X_train)\ny_train = np.array(y_train)\nX_test = np.array(X_test)\ny_test = np.array(y_test)\nX_pred = np.array(X_pred)\nprint('The shape of X_train:', X_train.shape)\nprint('The shape of y_train:', y_train.shape)\nprint('The shape of X_test:', X_test.shape)\nprint('The shape of y_test:', y_test.shape)\nprint('The shape of X_pred:', X_pred.shape)\n"

In [13]:
'''
from keras.preprocessing.image import ImageDataGenerator
# Preprocessing the training set
# Creating a data augmentation model
train_generator = ImageDataGenerator(rescale = 1./255,    
                                      shear_range = 0.2,
                                      rotation_range=20,
                                      zoom_range = 0.2,
                                      horizontal_flip = True)
train_generated = train_generator.flow(X_train, y_train, batch_size=32)

#train_generated = train_generator.flow_from_directory(train_path,
                                                      target_size = (64, 64),
                                                      batch_size = 32,
                                                      class_mode = 'categorical')

# Preprocessing the testing set
test_generator = ImageDataGenerator(rescale = 1./255)
test_generated = test_generator.flow(X_test, y_test, batch_size=32)
#test_generated = test_generator.flow_from_directory(test_path, 
                                                    target_size = (64,64),
                                                    batch_size = 32,
                                                    class_mode = 'categorical')
'''

"\nfrom keras.preprocessing.image import ImageDataGenerator\n# Preprocessing the training set\n# Creating a data augmentation model\ntrain_generator = ImageDataGenerator(rescale = 1./255,    \n                                      shear_range = 0.2,\n                                      rotation_range=20,\n                                      zoom_range = 0.2,\n                                      horizontal_flip = True)\ntrain_generated = train_generator.flow(X_train, y_train, batch_size=32)\n\n#train_generated = train_generator.flow_from_directory(train_path,\n                                                      target_size = (64, 64),\n                                                      batch_size = 32,\n                                                      class_mode = 'categorical')\n\n# Preprocessing the testing set\ntest_generator = ImageDataGenerator(rescale = 1./255)\ntest_generated = test_generator.flow(X_test, y_test, batch_size=32)\n#test_generated = test_generator.fl

# Building a base CNN model

In [14]:
from sklearn.model_selection import KFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, MaxPooling2D, Flatten

# Defining a CNN model
def define_cnn():
  # Creating the model
  cnn = Sequential()

  # Convolution 1
  cnn.add(Conv2D(32, (3,3), activation='relu', input_shape = (64,64,3)))
  # Maxpooling 1
  cnn.add(MaxPooling2D(pool_size = (2,2)))

  # Convolution 2
  cnn.add(Conv2D(64, (3,3), activation = 'relu'))
  # Maxpooling 2
  cnn.add(MaxPooling2D(pool_size = (2,2)))

  # Convolution 3
  cnn.add(Conv2D(64, (3,3), activation = 'relu'))
  #  Maxpooling 3
  cnn.add(MaxPooling2D(pool_size = (2,2)))

  # Flatten
  cnn.add(Flatten())
  
  # Fully-connected NN layer
  cnn.add(Dense(32, activation = 'relu'))
  cnn.add(Dense(16, activation = 'relu'))
  cnn.add(Dense(6, activation = 'softmax'))
  return cnn



In [15]:
from keras.preprocessing.image import ImageDataGenerator
# Defining 10-fold cross validation for validating the model in the training set
kfold = KFold(n_splits = 10, shuffle = True) #random_state = 42
fold_accuracy = []
fold_loss = []
fold = 1

for train, test in kfold.split(np.zeros(len(y_train)),y_train):
  # Preprocessing 
  train_generator = ImageDataGenerator(rescale = 1./255,    
                                      shear_range = 0.2,
                                      rotation_range=20,
                                      zoom_range = 0.2,
                                      horizontal_flip = True)
  train_generated = train_generator.flow(X_train[train], y_train[train], batch_size=32)

  test_generator = ImageDataGenerator(rescale = 1./255)
  test_generated = test_generator.flow(X_train[test], y_train[test], batch_size=32)
  
  # Create the cnn model
  cnn2 = define_cnn()
  
  # Compile the model
  cnn2.compile(optimizer = 'adam', 
              loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
              metrics = ['accuracy'])
  print ('-'*35, '\nTraining on fold {}'.format(fold))

  # Fit the model into the training set
  history = cnn2.fit(train_generated,  
                    epochs = 5,
                    validation_data=(test_generated) )
  
  # Create validation metrics
  scores = cnn2.evaluate(test_generated, verbose=1)

  print(f"Score of fold: {fold}: {cnn2.metrics_names[0]} is {scores[0]}, {cnn2.metrics_names[1]} is {scores[1]}")
  fold_loss.append(scores[0])
  fold_accuracy.append(scores[1])
  fold += 1

# Printing the average scores of 10 folders to get the generalized scores of the model
print('-'*35)
print('The generalized scores of the model:')
print('Accuracy: ', np.mean(fold_accuracy))
print('Loss: ', np.mean(fold_loss))


----------------------------------- 
Training on fold 1
Epoch 1/5
Epoch 2/5
Epoch 3/5

KeyboardInterrupt: ignored

In [None]:
'''
# Defining 10-fold cross validation for validating the model in the training set
kfold = KFold(n_splits = 10, shuffle = True) #random_state = 42
fold_accuracy, fold_loss = [],[]
fold = 1
for train, test in kfold.split(X_train, y_train):
  # Create the cnn model
  cnn = define_cnn()
  
  # Compile the model
  cnn.compile(optimizer = 'adam', 
              loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
              metrics = ['accuracy'])
  print ('-'*35, '\nTraining on fold {}'.format(fold))

  # Fit the model into the training set
  history = cnn.fit(X_train[train], 
                    y_train[train], 
                    epochs = 10,
                    validation_data=(X_train[test], y_train[test]) )
  
  # Create validation metrics
  scores = cnn.evaluate(X_train[test], y_train[test], verbose=1)
  print(f"Score of fold: {fold}: {cnn.metrics_names[0]} is {scores[0]}, {cnn.metrics_names[1]} is {scores[1]}")
  fold_loss.append(scores[0])
  fold_accuracy.append(scores[1])
  fold += 1

# Printing the average scores of 10 folders to get the generalized scores of the model
print('-'*35)
print('The generalized scores of the model:')
print('Accuracy: ', np.mean(fold_accuracy))
print('Loss: ', np.mean(fold_loss))
'''

In [None]:
# Fitting the model in the whole training set

train_generated = train_generator.flow(X_train, y_train, batch_size=32)
test_generated = test_generator.flow(X_test, y_test, batch_size=32)

cnn2.fit(train_generated, epochs = 5,  verbose = 1)
model_loss, model_accuracy = cnn2.evaluate(test_generated)
print(cnn2.summary())
print(model_loss, model_accuracy)

In [None]:
'''
# Fitting the model in the whole training set
cnn.fit(X_train, y_train, epochs = 10,  verbose = 1)
model_loss, model_accuracy = cnn.evaluate(X_test, y_test)
print(cnn.summary())
print(model_loss, model_accuracy)
'''

In [None]:
# Applying the model to predict on the test set
y_test_pred = np.argmax(cnn2.predict(test_generated), axis=-1) 
y_test_pred

In [None]:
# Evaluating the model with confusion matrix
from sklearn.metrics import confusion_matrix
conf_matrix = confusion_matrix(y_test, y_test_pred)

## Visualizing the confusion matrix
plt.figure(figsize=(10,10))
labels = [label for label in encoded_labels.keys() ]
sns.heatmap(conf_matrix, annot = True, fmt=".0f" )
plt.xticks(np.arange(6), labels, rotation = 60, ha='right' )
plt.yticks(np.arange(6), labels)
plt.xlabel ('predicted values')
plt.ylabel ('actual values')
plt.show()

In [None]:
# Plotting the performance of the model over training
plt.plot(history.history['accuracy'], label = 'train_accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy in training and validation over epoches')
plt.ylim([0.4, 1])
plt.legend(loc = 'lower right')
plt.show()

In [None]:
# Applied the model to predict the unseen data
pred_generator = ImageDataGenerator(rescale = 1./255)
pred_generated = pred_generator.flow(X_pred, batch_size=32)
y_pred = cnn2.predict(pred_generated, batch_size=32)

In [None]:
# Defining the function for predicting the unseen data
def pred_image(path):
    image=Image.open(path)
    image=image.resize((64,64))
    x=np.array(image)
    x=np.expand_dims(x,axis=0)
    classs=model.predict_classes(x)
    l=os.listdir('../input/intel-image-classification/seg_train/seg_train')
    l.sort()
    return l[classs[0]]