Convolution neural network ( model used - simple sequential layer model , model fine tuned with feature extracted with VGG 16 ImageNet and later it was modelled with n-estimator ensemble neural network
"""Imageclassification.ipynb
Automatically generated by Colaboratory.
Original file is located at https://colab.research.google.com/drive/1i0XH48POWTVDgKPos8GGGcfDlsEJrFdq
download the data sets on - https://www.kaggle.com/puneet6060/intel-image-classification or https://datahack.analyticsvidhya.com IMAGE Classifiaction Code- SUDEV PRADHAN (18149) sudev18@iiserbpr.ac.in
"""
#IMAGE Classifiaction using CNN (Simple model, VGG ImageNet and ensemble Neural network) #Code- SUDEV PRADHAN (18149) 4th May,2021 #sudev18@iiserbpr.ac.in
import numpy as np
import os
from sklearn.metrics import confusion_matrix
import seaborn as sn; sn.set(font_scale=1.4)
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import cv2
import tensorflow as tf
from tqdm import tqdm
"""# Loading Data sets"""
class_names = ['mountain', 'street', 'glacier', 'buildings', 'sea', 'forest'] class_names_label = {class_name:i for i, class_name in enumerate(class_names)}
nb_classes = len(class_names)
IMAGE_SIZE = (150, 150) def load_data():
datasets = ['../content/drive/My Drive/Colab Notebooks/image classification test train/seg_train', '../content/drive/My Drive/Colab Notebooks/image classification test train/seg_test']
output = []
# Iterate through training and test sets
for dataset in datasets:
images = []
labels = []
print("Loading {}".format(dataset))
# Iterate through each folder corresponding to a category
for folder in os.listdir(dataset):
label = class_names_label[folder]
# Iterate through each image in our folder
for file in tqdm(os.listdir(os.path.join(dataset, folder))):
# Get the path name of the image
img_path = os.path.join(os.path.join(dataset, folder), file)
# Open and resize the img
image = cv2.imread(img_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, IMAGE_SIZE)
# Append the image and its corresponding label to the output
images.append(image)
labels.append(label)
images = np.array(images, dtype = 'float32')
labels = np.array(labels, dtype = 'int32')
output.append((images, labels))
return output
(train_images, train_labels), (test_images, test_labels) = load_data() train_images, train_labels = shuffle(train_images, train_labels, random_state=25)
"""Visualising Datasets"""
n_train = train_labels.shape[0] n_test = test_labels.shape[0]
print ("Number of training examples: {}".format(n_train)) print ("Number of testing examples: {}".format(n_test)) print ("Each image is of size: {}".format(IMAGE_SIZE))
import pandas as pd plt.style.use('seaborn') _, train_counts = np.unique(train_labels, return_counts=True) _, test_counts = np.unique(test_labels, return_counts=True) pd.DataFrame({'train': train_counts, 'test': test_counts}, index=class_names ).plot.bar()
plt.show()
plt.style.use('fivethirtyeight') plt.pie(train_counts, explode=(0, 0, 0, 0, 0, 0) , labels=class_names, autopct='%1.1f%%') plt.axis('equal') plt.title('Percentage of each observed category') plt.show()
train_images = train_images / 255.0 test_images = test_images / 255.0
"""Visualisation
"""
def display_random_image(class_names, images, labels):
index = np.random.randint(images.shape[0])
plt.figure()
plt.imshow(images[index])
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.title('Image #{} : '.format(index) + class_names[labels[index]])
plt.show()
display_random_image(class_names, train_images, train_labels)
plt.style.use('ggplot') def display_examples(class_names, images, labels):
fig = plt.figure(figsize=(10,10))
for i in range(9):
plt.subplot(3,3,i+1)
plt.imshow(images[i], cmap=plt.cm.binary)
plt.xlabel(class_names[labels[i]])
plt.xticks([])
plt.yticks([])
plt.tight_layout()
plt.show()
display_examples(class_names, train_images, train_labels)
"""# Model 1- Simple Model Creation """
model = tf.keras.Sequential([ tf.keras.layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (150, 150, 3)), tf.keras.layers.MaxPooling2D(2,2), tf.keras.layers.Conv2D(32, (3, 3), activation = 'relu'), tf.keras.layers.MaxPooling2D(2,2), tf.keras.layers.Flatten(), tf.keras.layers.Dense(128, activation=tf.nn.relu), tf.keras.layers.Dense(6, activation=tf.nn.softmax) ])
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()
"""epoch model 1"""
history = model.fit(train_images, train_labels, batch_size=128, epochs=20, validation_split = 0.2)
plt.style.use('fivethirtyeight') plt.plot(history.history['loss'])
plt.plot(history.history['val_loss']) plt.ylabel('loss') plt.xlabel('epochs') plt.legend(['train', 'test'])
plt.style.use('fivethirtyeight') plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy']) plt.ylabel('accuracy') plt.xlabel('epochs') plt.legend(['train', 'test'])
"""accuracy"""
test_loss = model.evaluate(test_images, test_labels)
"""# Experiment on Random images
"""
predictions = model.predict(test_images) # Vector of probabilities pred_labels = np.argmax(predictions, axis = 1) # We take the highest probability
display_random_image(class_names, test_images, pred_labels)
"""confusion matrix"""
CM = confusion_matrix(test_labels, pred_labels) ax = plt.axes() sn.heatmap(CM, annot=True, annot_kws={"size": 10}, xticklabels=class_names, yticklabels=class_names, ax = ax) ax.set_title('Confusion matrix') plt.show()
from sklearn.metrics import classification_report, confusion_matrix print(classification_report(test_labels, pred_labels))
"""error
"""
def print_mislabeled_images(class_names, test_images, test_labels, pred_labels):
BOO = (test_labels == pred_labels)
mislabeled_indices = np.where(BOO == 0)
mislabeled_images = test_images[mislabeled_indices]
mislabeled_labels = pred_labels[mislabeled_indices]
title = "Some examples of mislabeled images by the classifier:"
display_examples(class_names, mislabeled_images, mislabeled_labels)
print_mislabeled_images(class_names, test_images, test_labels, pred_labels)
"""# Model 2- Extraction with VGG ImageNet"""
from keras.applications.vgg16 import VGG16 from keras.preprocessing import image from keras.applications.vgg16 import preprocess_input
model = VGG16(weights='imagenet', include_top=False)
train_features = model.predict(train_images) test_features = model.predict(test_images)
"""PCA"""
n_train, x, y, z = train_features.shape n_test, x, y, z = test_features.shape numFeatures = x * y * z
from sklearn import decomposition
pca = decomposition.PCA(n_components = 2)
X = train_features.reshape((n_train, xyz)) pca.fit(X)
C = pca.transform(X) C1 = C[:,0] C2 = C[:,1]
plt.subplots(figsize=(10,10))
for i, class_name in enumerate(class_names): plt.scatter(C1[train_labels == i][:1000], C2[train_labels == i][:1000], label = class_name, alpha=0.4) plt.legend() plt.title("PCA Projection") plt.show()
"""Trainig on VGG
"""
model2 = tf.keras.Sequential([ tf.keras.layers.Flatten(input_shape = (x, y, z)), tf.keras.layers.Dense(50, activation=tf.nn.relu), tf.keras.layers.Dense(6, activation=tf.nn.softmax) ])
model2.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])
history2 = model2.fit(train_features, train_labels, batch_size=128, epochs=15, validation_split = 0.2)
model2.summary()
"""epoch"""
plt.style.use('fivethirtyeight') plt.plot(history2.history['loss'])
plt.plot(history2.history['val_loss']) plt.ylabel('loss') plt.xlabel('epochs') plt.legend(['train', 'test'])
plt.style.use('fivethirtyeight') plt.plot(history2.history['accuracy'])
plt.plot(history2.history['val_accuracy']) plt.ylabel('accuracy') plt.xlabel('epochs') plt.legend(['train', 'test'])
"""accuracy"""
test_loss = model2.evaluate(test_features, test_labels)
"""Confusion matrix model 2"""
predictions = model2.predict(test_features) # Vector of probabilities pred_labels = np.argmax(predictions, axis = 1) # We take the highest probability
CM = confusion_matrix(test_labels, pred_labels) ax = plt.axes() sn.heatmap(CM, annot=True, annot_kws={"size": 10}, xticklabels=class_names, yticklabels=class_names, ax = ax) ax.set_title('Confusion matrix') plt.show()
from sklearn.metrics import classification_report, confusion_matrix print(classification_report(test_labels, pred_labels))
"""# Model 3- Ensemble Neural Networks"""
np.random.seed(seed=1997)
n_estimators = 10
max_samples = 0.8
max_samples *= n_train max_samples = int(max_samples)
models = list() random = np.random.randint(50, 100, size = n_estimators)
for i in range(n_estimators):
# Model
model = tf.keras.Sequential([ tf.keras.layers.Flatten(input_shape = (x, y, z)),
# One layer with random size
tf.keras.layers.Dense(random[i], activation=tf.nn.relu),
tf.keras.layers.Dense(6, activation=tf.nn.softmax)
])
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])
# Store model
models.append(model)
model.summary()
"""epoch"""
histories = []
for i in range(n_estimators): # Train each model on a bag of the training data train_idx = np.random.choice(len(train_features), size = max_samples) histories.append(models[i].fit(train_features[train_idx], train_labels[train_idx], batch_size=128, epochs=10, validation_split = 0.1))
"""accuracy """
predictions = [] for i in range(n_estimators): predictions.append(models[i].predict(test_features))
predictions = np.array(predictions) predictions = predictions.sum(axis = 0) pred_labels = predictions.argmax(axis=1)
from sklearn.metrics import accuracy_score print("Accuracy : {}".format(accuracy_score(test_labels, pred_labels)))
plt.style.use('fivethirtyeight') plt.plot(histories[1].history['accuracy'])
plt.plot(histories[1].history['val_accuracy']) plt.ylabel('accuracy') plt.xlabel('epochs') plt.legend(['train', 'test'])
plt.style.use('fivethirtyeight') plt.plot(histories[1].history['loss'])
plt.plot(histories[1].history['val_loss']) plt.ylabel('loss') plt.xlabel('epochs') plt.legend(['train', 'test'])
from sklearn.metrics import classification_report, confusion_matrix print(classification_report(test_labels, pred_labels))