<a href="https://colab.research.google.com/github/jett220201/Usable-and-No-Usable-Waste-Classifier-with-DL/blob/main/Models/VGG19_%26_SVM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Solid waste sorter (PUJ) using VGG19 & SVM

- Juan Esteban Torres Tamayo
- Janner Arley Rosero Mora

Based on:
 https://github.com/bnsreenu/python_for_microscopists/blob/master/158b_transfer_learning_using_CNN_weights_VGG16_RF.py

In [None]:
import os
import cv2
import glob
import random
import sklearn
import numpy as np
import pandas as pd 
import seaborn as sns
import tensorflow as tf
from sklearn import svm
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.metrics import auc
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.metrics import roc_curve
from sklearn.metrics import make_scorer
from keras.applications.vgg19 import VGG19
from keras.models import Model, Sequential
from sklearn.metrics import matthews_corrcoef
from sklearn.model_selection import GridSearchCV
from matplotlib.collections import EventCollection
from sklearn.model_selection import train_test_split

#########################################################
MCC_scorer = make_scorer(matthews_corrcoef)  #Create MCC scorer for validate score
#########################################################


In [None]:
# Read input images and assign labels based on folder names
print(os.listdir("data/"))
SIZE = 224  #Resize images

In [None]:
#Capture training data and labels into respective lists
images = []
labels = [] 

for directory_path in glob.glob("data/*"):
    label = directory_path.split("\\")[-1]
    print(label)
    for img_path in glob.glob(os.path.join(directory_path, "*.jpg")):
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)       
        img = cv2.resize(img, (SIZE, SIZE))
        images.append(img)
        labels.append(label)

#Convert lists to arrays        
images = np.array(images)
labels = np.array(labels)


In [None]:
Seed_0 = random.randint(1,2**32 - 1) #Seed used for split

In [None]:
#Spliting Dataset
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state = Seed_0)

In [None]:
#Encode labels from text to integers.

le = preprocessing.LabelEncoder()
le.fit(y_test)
test_labels_encoded = le.transform(y_test)
le.fit(y_train)
train_labels_encoded = le.transform(y_train)

#print(test_labels_encoded)
print(train_labels_encoded)
len(train_labels_encoded)

#0 = Aprovechable
#1 = No_Aprovechable

# Preprocessing is needed

Note: each Keras Application expects a specific kind of input preprocessing. For VGG19, call `tf.keras.applications.vgg19.preprocess_input` on your inputs before passing them to the model. vgg19.preprocess_input will convert the input images from RGB to BGR, then will zero-center each color channel with respect to the ImageNet dataset, without scaling.


Source: https://keras.io/api/applications/vgg/#vgg19-function

In [None]:
#Inputs
X_train = tf.keras.applications.vgg19.preprocess_input(X_train)
Y_train = train_labels_encoded
X_test = tf.keras.applications.vgg19.preprocess_input(X_test)
Y_test = test_labels_encoded

In [None]:
#Charging VGG19 model

VGG19model = VGG19(
    include_top=False,
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
)

#Make loaded layers as non-trainable. This is important as we want to work with pre-trained weights

for layer in VGG19model.layers:
    layer.trainable = False

VGG19model.summary()  #Trainable parameters will be 0

In [None]:
#Now, let us use features from convolutional network for SVM

feature_extractor=VGG19model.predict(X_train)

features = feature_extractor.reshape(feature_extractor.shape[0], -1)

X_for_SVM = features #This is our X input to SVM

############################################################

X_test_feature = VGG19model.predict(X_test)

X_test_features = X_test_feature.reshape(X_test_feature.shape[0], -1)


Source: https://scikit-learn.org/stable/modules/svm.html

In [None]:
# Hyperparameter Optimization

print(svm.SVC().__doc__)

In [None]:
  # defining parameter range
param_grid1 = {
    'C': [ 0.01, 0.1, 1.0, 5, 10, 100,1000],
    'class_weight': ["balanced",None],
    'decision_function_shape': ['ovo', 'ovr'],
    'degree': [ 1, 2, 3, 4, 5,6],
    'gamma': ['scale', 'auto'],
    'kernel': ['poly'],
    'probability': [False, True],
    'tol': [0.001, 0.0001, 0.00001]} 
  
grid1 = GridSearchCV(SVC(),
                    param_grid1,
                    refit = True,
                    verbose = 3,
                    scoring = MCC_scorer)
  
# fitting the model for grid search
grid1.fit(X_for_SVM, Y_train)

# print best parameter after tuning
print(grid1.best_params_)
  
# print how our model looks after hyper-parameter tuning
print(grid1.best_estimator_)

In [None]:
# defining parameter range
param_grid2 = {
    'C': [ 0.1, 1.0, 5, 10, 100,1000]
} 
  
grid2 = GridSearchCV(SVC(),
                    param_grid2,
                    refit = True,
                    verbose = 3,
                    scoring = MCC_scorer,
                    n_jobs = -1)
  
# fitting the model for grid search
grid2.fit(X_for_SVM, Y_train)

# print best parameter after tuning
print(grid2.best_params_)
  
# print how our model looks after hyper-parameter tuning
print(grid2.best_estimator_)

#Print best score MCC
print('Best score: ',grid2.best_score_)

In [None]:
# Save the best hyperparameter
Best_C = grid2.best_params_.get('C') 
print('Best C = ',Best_C)
print(type(Best_C))

In [None]:
# defining parameter range
param_grid3 = {
    'gamma': [ 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
} 
  
grid3 = GridSearchCV(SVC(C = Best_C),
                    param_grid3,
                    refit = True,
                    verbose = 3,
                    scoring = MCC_scorer,
                    n_jobs = -1)
  
# fitting the model for grid search
grid3.fit(X_for_SVM, Y_train)

# print best parameter after tuning
print(grid3.best_params_)
  
# print how our model looks after hyper-parameter tuning
print(grid3.best_estimator_)

In [None]:
print('Best score: ',grid3.best_score_)

In [None]:
# defining parameter range
param_grid4 = {
    'gamma': [ 'auto','scale'],
} 
  
grid4 = GridSearchCV(SVC(C = Best_C),
                    param_grid4,
                    refit = True,
                    verbose = 3,
                    scoring = MCC_scorer)
  
# fitting the model for grid search
grid4.fit(X_for_SVM, Y_train)

# print best parameter after tuning
print(grid4.best_params_)
  
# print how our model looks after hyper-parameter tuning
print(grid4.best_estimator_)

#Print best score MCC
print('Best score: ',grid4.best_score_)

In [None]:
# Save the best hyperparameter
Best_gamma = grid4.best_params_.get('gamma') 
print('Best gamma = ',Best_gamma)
print(type(Best_gamma))

In [None]:
# defining parameter range
param_grid5 = {
    'degree': [ 1, 2, 3, 4, 5, 6 ]
} 
  
grid5 = GridSearchCV(SVC(C = Best_C, gamma = Best_gamma, kernel = 'poly'),
                    param_grid5,
                    refit = True,
                    verbose = 3,
                    scoring = MCC_scorer)
  
# fitting the model for grid search
grid5.fit(X_for_SVM, Y_train)

# print best parameter after tuning
print(grid5.best_params_)
  
# print how our model looks after hyper-parameter tuning
print(grid5.best_estimator_)

#Print best score MCC
print('Best score: ',grid5.best_score_)

In [None]:
param_grid6 = {
    'kernel': [ 'linear', 'rbf', 'sigmoid' ]
} 
  
grid6 = GridSearchCV(SVC(C = Best_C, gamma = Best_gamma),
                    param_grid6,
                    refit = True,
                    verbose = 3,
                    scoring = MCC_scorer)
  
# fitting the model for grid search
grid6.fit(X_for_SVM, Y_train)

#print("Best grid 8 score ----> ",grid8.best_score_)

# print best parameter after tuning
print(grid6.best_params_)
  
# print how our model looks after hyper-parameter tuning
print(grid6.best_estimator_)

#Print best score MCC
print('Best score: ',grid6.best_score_)

In [None]:
# Save the best hyperparameter
Best_kernel = grid6.best_params_.get('kernel') 
print('Best kernel = ',Best_kernel)
print(type(Best_kernel))

In [None]:
# defining parameter range
param_grid7 = {
    'class_weight': ["balanced",None],
    'decision_function_shape': ['ovo', 'ovr'],
    'tol': [0.001, 0.0001, 0.00001]
} 
  
grid7 = GridSearchCV(SVC(C = Best_C, gamma = Best_gamma, kernel = Best_kernel),
                    param_grid7,
                    refit = True,
                    verbose = 3,
                    scoring = MCC_scorer)
  
# fitting the model for grid search
grid7.fit(X_for_SVM, Y_train)

# print best parameter after tuning
print(grid7.best_params_)
  
# print how our model looks after hyper-parameter tuning
print(grid7.best_estimator_)

#Print best score MCC
print('Best score: ',grid7.best_score_)

# Best optimized classifier 
**SVC(C = 100, gamma = 'scale', decision_function_shape = 'ovo',class_weight = 'balanced',kernel ='rbf')**

In [None]:
#Loading SVC skalearn

SVM_clf = svm.SVC(C = 100, gamma = 'scale',
                  decision_function_shape = 'ovo',
                  class_weight = 'balanced',
                  kernel ='rbf',
                  probability = True,
                  random_state = Seed_0)

SVM_clf.fit(X_for_SVM, Y_train)

#Now predict using the trained SVM model. 

prediction_SVM = SVM_clf.predict(X_test_features)
Y_predict = np.array(prediction_SVM)
predict_probabilities = SVM_clf.predict_proba(X_test_features)[:,1]

prediction_SVM = le.inverse_transform(prediction_SVM) #recovering labels between 0 y 1

In [None]:
#metrics
#confusion_matrix
CM = sklearn.metrics.confusion_matrix(y_test, prediction_SVM)
TP = CM[0][0]
FN = CM[0][1]
FP = CM[1][0]
TN = CM[1][1]

#Calculate F1 Score - Validation
F1 = ( ( 2*TP ) / ( 2*TP + FP + FN ) )

#Calculate Recall - Validation
Recall = ( TP / ( TP + FN ) )

#Calculate Specificity - Validation
Specificity = ( TN / ( TN + FP )  )

#Confusion Matrix - verify accuracy of each class

CM = pd.DataFrame(CM,columns = ['Aprovechable','No Aprovechable'],index = ['Aprovechable','No Aprovechable'])
sns.heatmap(CM, annot=True, linewidth = 1.5, fmt ="d")

#Calculating MCC
MCC = sklearn.metrics.matthews_corrcoef(y_test, prediction_SVM)

print('=======================================')
print("|            Best Scores               |")
print('=======================================\n')
print('        Accuracy :     {:.4f}'.format(metrics.accuracy_score(y_test, prediction_SVM)))
print('        Recall :       {:.4f}'.format(Recall))
print('        Specificity :  {:.4f}'.format(Specificity))
print('        AUC :          {:.4f}'.format(sklearn.metrics.roc_auc_score(y_test, predict_probabilities )))
print('        F1 Score :     {:.4f}'.format(F1))
print('        MCC :          {:.4f}'.format(MCC))
print('        Seed:          ',Seed_0)

#Check results on a few select images
n = np.random.randint(0,X_test.shape[0])
img = X_test[n]
plt.imshow(img)
input_img = np.expand_dims(img, axis=0) #Expand dims so the input is (num images, x, y, c)
input_img_feature=VGG19model.predict(input_img)
input_img_features=input_img_feature.reshape(input_img_feature.shape[0], -1)
prediction_svm = SVM_clf.predict(input_img_features)[0] 
prediction_svm = le.inverse_transform([prediction_svm])  #Reverse the label encoder to original name
print("The prediction for this image is: ", prediction_svm)
print("The actual label for this image is: ", y_test[n])

print("\n\n                     ▼ Confusion Matrix ▼")
print("   --------------------------------------------")



In [None]:
#Calculating AUC - ROC curve 

Y_tf = predict_probabilities
fpr, tpr, thresholds = roc_curve(Y_test, Y_tf)
AUC = auc(fpr, tpr)

In [None]:
#Show ROC-AUC Curve
plt.figure("ROC-AUC Curve")
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr, tpr, label='VGG19 + SVC (area = {:.3f})'.format(AUC), color = 'tab:orange')
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title('ROC curve')
plt.legend(loc='best')
plt.show()
# Zoom in view of the upper left corner.
plt.figure('Zoomed - ROC-AUC Curve')
plt.xlim(0, 0.4)
plt.ylim(0.6, 1)
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr, tpr, label='VGG19 + SVC(area = {:.3f})'.format(AUC),color = 'tab:orange')
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title('ROC curve (zoomed in at top left)')
plt.legend(loc='best')

# Real Labels

In [None]:
Y_test

# Predicted Labels

In [None]:
SVM_clf.predict(X_test_features)

# Predicted Probabilities

In [None]:
predict_probabilities