<a href="https://colab.research.google.com/github/MateusFerroAntunesdeOliveira/Feature_Extraction_CNN_InceptionV3/blob/main/Feature_Extraction_CNN_InceptionV3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# ------ IMPORTS ------

import matplotlib.pyplot as plt
import matplotlib as pl
import matplotlib
import seaborn as sns
import itertools
import cv2
import pandas as pd
import numpy as np
import os

from skimage import feature
from skimage.feature import hog

from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix

# Monolithic Classifiers 
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

# Ensembles
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier


In [None]:

model = InceptionV3(include_top = False, weights = 'imagenet', pooling = 'avg', input_tensor=Input(shape=(299,299,3)))

# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= List of paths =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
file_list = []
file_list.append(os.listdir(r"/content/drive/MyDrive/ML_TDE_02/Base/humanos"))
file_list.append(os.listdir(r"/content/drive/MyDrive/ML_TDE_02/Base/praia"))
file_list.append(os.listdir(r"/content/drive/MyDrive/ML_TDE_02/Base/obras"))
file_list.append(os.listdir(r"/content/drive/MyDrive/ML_TDE_02/Base/onibus"))
file_list.append(os.listdir(r"/content/drive/MyDrive/ML_TDE_02/Base/dino"))
file_list.append(os.listdir(r"/content/drive/MyDrive/ML_TDE_02/Base/elefante"))
file_list.append(os.listdir(r"/content/drive/MyDrive/ML_TDE_02/Base/flores"))
file_list.append(os.listdir(r"/content/drive/MyDrive/ML_TDE_02/Base/cavalos"))
file_list.append(os.listdir(r"/content/drive/MyDrive/ML_TDE_02/Base/montanhas"))
file_list.append(os.listdir(r"/content/drive/MyDrive/ML_TDE_02/Base/comida"))

# General path
path = '/content/drive/MyDrive/ML_TDE_02/Base/'

# List of classes
class_names = ['humanos', 'praia', 'obras', 'onibus', 'dino', 'elefante', 'flores', 'cavalos', 'montanhas', 'comida'] 

# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Feature extraction =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
X = []
X_deep = []
y = []

for classes_files, classe in zip (file_list, range(10)):
    for i in range(100):
      name= str(path) + str(class_names[classe]) + str('/') + str(classes_files[i]) 
      print(name)
      imagem = cv2.imread(name)

      print(imagem.shape)
      altura, largura, _ = imagem.shape
    
      # -------------- Convert the image to RGB and Gray -------------- 
      cinza = cv2.cvtColor(imagem, cv2.COLOR_BGR2GRAY)
      rgb   = cv2.cvtColor(imagem, cv2.COLOR_BGR2RGB)

      # -------------- Color Histograms -------------- 
      r_histograma = cv2.calcHist([rgb], [0], None, [256], [0, 256])/(altura*largura)
      g_histograma = cv2.calcHist([rgb], [1], None, [256], [0, 256])/(altura*largura)
      b_histograma = cv2.calcHist([rgb], [2], None, [256], [0, 256])/(altura*largura)

      # -------------- Local Binary Pattern (LBP) -------------- 
      lbp = feature.local_binary_pattern(cinza, 59, 1, method = "uniform")
      (lbp_histograma, _) = np.histogram(lbp.ravel(), bins=59, range=(0, 59))
      lbp_histograma = lbp_histograma.astype("float")
      lbp_histograma /= (lbp_histograma.sum())
    
      # -------------- Hog (Histogram of Gradient - Direction) -------------- 
      hg = hog(cinza, orientations=8, pixels_per_cell=(32, 32), cells_per_block=(8, 8), block_norm='L2-Hys')
    
      # -------------- Concatenate the handcrafted feature sets -------------- 
      X_image = [lbp_histograma, hg, r_histograma, g_histograma, b_histograma]    
      X_image_aux = []
      for aux in X_image:
          X_image_aux = np.append(X_image_aux, np.ravel(aux))
    
      X_image = [i for i in X_image_aux]
      y.append(classe)
      X.append(X_image)
      
      # -------------- Extract deep features using InceptionV3 pretrained model -------------- 
      img = cv2.resize(imagem,(299,299))
      xd = image.img_to_array(img)
      xd = np.expand_dims(xd, axis=0)
      xd = preprocess_input(xd)
      deep_features = model.predict(xd)
      print(deep_features.shape)
      
      X_image_aux = []
      for aux in deep_features:
          X_image_aux = np.append(X_image_aux, np.ravel(aux))
    
      deep_features = [i for i in X_image_aux]
      X_deep.append(deep_features)
            

# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Saving the files/folders =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

# Saving the extracted features (handcrafted) in a csv file
df = pd.DataFrame(X)
df.to_csv('X.csv', header = False, index = False)

# Saving the extracted features (deep) in a csv file
df = pd.DataFrame(X_deep)
df.to_csv('X_deep.csv', header = False, index = False)

# Saving the classes in a csv file
df_class = pd.DataFrame(y)
df_class.to_csv('y.csv', header = False, index = False)


In [None]:
# Labels
y = pd.read_csv('/content/drive/MyDrive/ML_TDE_02/Features/y.csv', header=None)
y = y.to_numpy()
y = np.ravel(y)
print(y.shape)

# Deep features
X = pd.read_csv('/content/drive/MyDrive/ML_TDE_02/Features/X_deep.csv', header=None)
X = X.to_numpy()
print(X.shape)


In [None]:
# ------------ Plot graphic ------------

matplotlib.rc('figure', figsize = (11, 11))

# Plotting confusion matrix
def plot_confusion_matrix(ax, cm, title = None):
    df_cm = pd.DataFrame(cm, index = [i for i in "0123456789"], columns = [i for i in "0123456789"])
    sns.heatmap(df_cm, annot = True)
    ax.set_title('Confusion Matrix --> ' + title)
    ax.set_ylabel('True Label')
    ax.set_xlabel('Predicted Label')
    return ax
    

In [None]:
# -------- HOLDOUT --------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

# Monolithic Classifiers
knn = KNeighborsClassifier(n_neighbors = 1, p = 3, algorithm = 'brute', weights = 'distance')
dt =  DecisionTreeClassifier(criterion = 'entropy', max_depth = 10, min_samples_split = 10)
svm = SVC(probability = True, C = 0.1, kernel = 'linear')
nb = GaussianNB(var_smoothing = 1e-09)
mlp = MLPClassifier(hidden_layer_sizes = (16), activation = 'relu', max_iter = 5000, solver = 'lbfgs', tol =  1e-10, early_stopping = True, validation_fraction = 0.2)

# Ensembles (homogeneous)
bg = BaggingClassifier(base_estimator = mlp, n_estimators = 100, random_state = 0)
ada = AdaBoostClassifier(n_estimators = 100, base_estimator = nb, learning_rate = 0.05, random_state = 0)
rf = RandomForestClassifier(n_estimators = 1000, random_state = 0)

# Ensembles (heterogeneous)
cb = VotingClassifier(estimators = [('DecisionTree', dt), ('NaiveBayes', nb)], voting = 'soft')

titles = ['KNN','DecisionTree', 'SVM', 'NaiveBayes', 'MLP', 'Bag', 'Ada', 'RF', 'DT+NB']
methods = [knn, dt, svm, nb, mlp, bg, ada, rf, cb]


In [None]:
# Fit the classifiers
scores = []
for method, name in zip(methods, titles):
    method.fit(X_train, y_train)
    scores.append(method.score(X_test, y_test))
    print("Classification Accuracy {} = {}".format(name, method.score(X_test, y_test)))


In [None]:
# Plotting Confusion matrix

fig3, sub1 = plt.subplots(5, 2, figsize = (15, 15))
plt.subplots_adjust(wspace = 0.4, hspace = 0.4)
for clf, ax, title in zip(methods, sub1.flatten(), titles):
    y_predicted = clf.predict(X_test)
    cm = confusion_matrix(y_test , y_predicted)
    df_cm = pd.DataFrame(cm, index = [i for i in "0123456789"], columns = [i for i in "0123456789"])
    sns.heatmap(df_cm, annot=True, ax=ax)
    ax.set_title('Confusion Matrix --> ' + title)
    ax.set_ylabel('True label')
    ax.set_xlabel('Predicted label')

plt.show()
plt.tight_layout()
