# Xception Feature Extraction

Transfer Learning with Keras and Deep Learning
https://pyimagesearch.com/2019/05/20/transfer-learning-with-keras-and-deep-learning/

Keras: Feature extraction on large datasets with Deep Learning  
https://pyimagesearch.com/2019/05/27/keras-feature-extraction-on-large-datasets-with-deep-learning/

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
from imutils import paths

import tensorflow as tf
from tensorflow import keras 
from keras.models import Model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.preprocessing import LabelEncoder, LabelBinarizer
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

import warnings
warnings.filterwarnings("ignore")

In [2]:
## path to image dataset
BASE_DIR = "C:/Users/noqui/Desktop/FYP/Work_Folder/dataset/cassava"
TRAIN_DIR = "train"
TEST_DIR = "test"

## path to loading model
MODEL_PATH = "C:/Users/noqui/Desktop/FYP/Work_Folder/output/model"

## output paths
FEATURES_OUTPUT_PATH = "C:/Users/noqui/Desktop/FYP/Work_Folder/output/features"
LE_OUTPUT_PATH = "C:/Users/noqui/Desktop/FYP/Work_Folder/output/encoder"
MODEL_OUTPUT_PATH = "C:/Users/noqui/Desktop/FYP/Work_Folder/output/model"

for path in [FEATURES_OUTPUT_PATH, LE_OUTPUT_PATH, MODEL_OUTPUT_PATH]:
    if not os.path.exists(path):
        os.makedirs(path)
    
## essential parameters
BATCH_SIZE = 32
DIM = (224, 224)

## Feature Extraction

In [3]:
### Function to extract features 
# model        : Keras model for feature extractor
# output_shape : Output shape of the max-pooling layer
# model_name   : Name of the model

def extractFeatures(model, output_shape, model_name):
    le = None
    for split_type in [TRAIN_DIR, TEST_DIR]:

        # grab all image paths in the current path
        print(f"[INFO] processing {split_type} split...")
        path = f"{BASE_DIR}/{split_type}"
        imagePaths = list(paths.list_images(path)) # unlike os.listdir, this grabs all images from subfolders

        # randomly shuffle the image paths and then extract the class
        # labels from the file paths
        labels = [p.split(os.path.sep)[1] for p in imagePaths]
        allFeatures = [] # to be appended
        
        # fit the label encoder once
        if le is None:
            le = LabelEncoder()
            le.fit(labels)

        # loop over the images in batches
        for (b, i) in enumerate(range(0, len(imagePaths), BATCH_SIZE)):
            # extract the batch of images and labels, then initialize the
            # list of actual images that will be passed through the network
            # for feature extraction
            print(f"[INFO] processing batch {b+1}/{int(np.ceil(len(imagePaths) / float(BATCH_SIZE)))}")
            batchPaths = imagePaths[i:i + BATCH_SIZE]
            batchImages = []

            # loop over the images and labels in the current batch
            for imagePath in batchPaths:
                # load the input image using the Keras helper utility
                # while ensuring the image is resized to 224x224 pixels
                image = load_img(imagePath, target_size = DIM) 
                image = img_to_array(image)

                # preprocess the image by 
                # (1) expanding the dimensions and
                # (2) subtracting the mean RGB pixel intensity from the ImageNet dataset
                image = np.expand_dims(image, axis = 0)
                image = preprocess_input(image)

                # add the image to the batch
                batchImages.append(image)

            # pass the images through the network and use the outputs as
            # our actual features, then reshape the features into a flattened volume
            batchImages = np.vstack(batchImages)
            features = model.predict(batchImages, batch_size = BATCH_SIZE)
            features = features.reshape((features.shape[0], output_shape))
            
            # append features
            for vec in features:
                allFeatures.append(vec)
        
        allFeatures = np.array(allFeatures)
        labels = np.array(labels)
        
        # dump features, labels, and imagePaths
        pickle.dump(allFeatures, open(f"{FEATURES_OUTPUT_PATH}/{model_name.lower()}_{split_type}_features.pkl", "wb"))
        pickle.dump(labels, open(f"{FEATURES_OUTPUT_PATH}/{model_name.lower()}_{split_type}_labels.pkl", "wb"))
        pickle.dump(imagePaths,  open(f"{FEATURES_OUTPUT_PATH}/{model_name.lower()}_{split_type}_imagepaths.pkl", "wb"))

    # serialize the label encoder to disk
    pickle.dump(le, open(f"{LE_OUTPUT_PATH}/encoder.pkl", "wb"))

In [4]:
from tensorflow.keras.applications import Xception
from tensorflow.keras.applications.xception import preprocess_input

# Xception with imagenet as weights
xception_imagenet_fe = Xception(weights = "imagenet", input_shape=(224, 224, 3), include_top = False)
xception_imagenet_fe.trainable = True
xception_imagenet_fe.summary()

Model: "xception"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 block1_conv1 (Conv2D)          (None, 111, 111, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 block1_conv1_bn (BatchNormaliz  (None, 111, 111, 32  128        ['block1_conv1[0][0]']           
 ation)                         )                                                          

 block4_sepconv1_bn (BatchNorma  (None, 28, 28, 728)  2912       ['block4_sepconv1[0][0]']        
 lization)                                                                                        
                                                                                                  
 block4_sepconv2_act (Activatio  (None, 28, 28, 728)  0          ['block4_sepconv1_bn[0][0]']     
 n)                                                                                               
                                                                                                  
 block4_sepconv2 (SeparableConv  (None, 28, 28, 728)  536536     ['block4_sepconv2_act[0][0]']    
 2D)                                                                                              
                                                                                                  
 block4_sepconv2_bn (BatchNorma  (None, 28, 28, 728)  2912       ['block4_sepconv2[0][0]']        
 lization)

 n)                                                                                               
                                                                                                  
 block7_sepconv1 (SeparableConv  (None, 14, 14, 728)  536536     ['block7_sepconv1_act[0][0]']    
 2D)                                                                                              
                                                                                                  
 block7_sepconv1_bn (BatchNorma  (None, 14, 14, 728)  2912       ['block7_sepconv1[0][0]']        
 lization)                                                                                        
                                                                                                  
 block7_sepconv2_act (Activatio  (None, 14, 14, 728)  0          ['block7_sepconv1_bn[0][0]']     
 n)                                                                                               
          

 block9_sepconv3_bn (BatchNorma  (None, 14, 14, 728)  2912       ['block9_sepconv3[0][0]']        
 lization)                                                                                        
                                                                                                  
 add_7 (Add)                    (None, 14, 14, 728)  0           ['block9_sepconv3_bn[0][0]',     
                                                                  'add_6[0][0]']                  
                                                                                                  
 block10_sepconv1_act (Activati  (None, 14, 14, 728)  0          ['add_7[0][0]']                  
 on)                                                                                              
                                                                                                  
 block10_sepconv1 (SeparableCon  (None, 14, 14, 728)  536536     ['block10_sepconv1_act[0][0]']   
 v2D)     

                                                                                                  
 block12_sepconv3_act (Activati  (None, 14, 14, 728)  0          ['block12_sepconv2_bn[0][0]']    
 on)                                                                                              
                                                                                                  
 block12_sepconv3 (SeparableCon  (None, 14, 14, 728)  536536     ['block12_sepconv3_act[0][0]']   
 v2D)                                                                                             
                                                                                                  
 block12_sepconv3_bn (BatchNorm  (None, 14, 14, 728)  2912       ['block12_sepconv3[0][0]']       
 alization)                                                                                       
                                                                                                  
 add_10 (A

In [5]:
# load Xception with transfer learning 
xception_pretrained = keras.models.load_model(f"{MODEL_PATH}/Xception.h5")

# remove the fully connected layers
xception_pretrained_fe = Model(inputs = xception_pretrained.inputs, outputs = xception_pretrained.layers[-4].output)
xception_pretrained_fe.trainable = True
xception_pretrained_fe.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_4 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 block1_conv1 (Conv2D)          (None, 111, 111, 32  864         ['input_4[0][0]']                
                                )                                                                 
                                                                                                  
 block1_conv1_bn (BatchNormaliz  (None, 111, 111, 32  128        ['block1_conv1[0][0]']           
 ation)                         )                                                             

 block4_sepconv1_bn (BatchNorma  (None, 28, 28, 728)  2912       ['block4_sepconv1[0][0]']        
 lization)                                                                                        
                                                                                                  
 block4_sepconv2_act (Activatio  (None, 28, 28, 728)  0          ['block4_sepconv1_bn[0][0]']     
 n)                                                                                               
                                                                                                  
 block4_sepconv2 (SeparableConv  (None, 28, 28, 728)  536536     ['block4_sepconv2_act[0][0]']    
 2D)                                                                                              
                                                                                                  
 block4_sepconv2_bn (BatchNorma  (None, 28, 28, 728)  2912       ['block4_sepconv2[0][0]']        
 lization)

 n)                                                                                               
                                                                                                  
 block7_sepconv1 (SeparableConv  (None, 14, 14, 728)  536536     ['block7_sepconv1_act[0][0]']    
 2D)                                                                                              
                                                                                                  
 block7_sepconv1_bn (BatchNorma  (None, 14, 14, 728)  2912       ['block7_sepconv1[0][0]']        
 lization)                                                                                        
                                                                                                  
 block7_sepconv2_act (Activatio  (None, 14, 14, 728)  0          ['block7_sepconv1_bn[0][0]']     
 n)                                                                                               
          

 block9_sepconv3_bn (BatchNorma  (None, 14, 14, 728)  2912       ['block9_sepconv3[0][0]']        
 lization)                                                                                        
                                                                                                  
 add_7 (Add)                    (None, 14, 14, 728)  0           ['block9_sepconv3_bn[0][0]',     
                                                                  'add_6[0][0]']                  
                                                                                                  
 block10_sepconv1_act (Activati  (None, 14, 14, 728)  0          ['add_7[0][0]']                  
 on)                                                                                              
                                                                                                  
 block10_sepconv1 (SeparableCon  (None, 14, 14, 728)  536536     ['block10_sepconv1_act[0][0]']   
 v2D)     

                                                                                                  
 block12_sepconv3_act (Activati  (None, 14, 14, 728)  0          ['block12_sepconv2_bn[0][0]']    
 on)                                                                                              
                                                                                                  
 block12_sepconv3 (SeparableCon  (None, 14, 14, 728)  536536     ['block12_sepconv3_act[0][0]']   
 v2D)                                                                                             
                                                                                                  
 block12_sepconv3_bn (BatchNorm  (None, 14, 14, 728)  2912       ['block12_sepconv3[0][0]']       
 alization)                                                                                       
                                                                                                  
 add_10 (A

In [6]:
# extracting features with pretrained weights
model_name = "Cassava_Dataset_Xception_pretrained"
extractFeatures(xception_pretrained_fe, output_shape = 7 * 7 * 6, model_name = model_name)

[INFO] processing train split...
[INFO] processing batch 1/469
[INFO] processing batch 2/469
[INFO] processing batch 3/469
[INFO] processing batch 4/469
[INFO] processing batch 5/469
[INFO] processing batch 6/469
[INFO] processing batch 7/469
[INFO] processing batch 8/469
[INFO] processing batch 9/469
[INFO] processing batch 10/469
[INFO] processing batch 11/469
[INFO] processing batch 12/469
[INFO] processing batch 13/469
[INFO] processing batch 14/469
[INFO] processing batch 15/469
[INFO] processing batch 16/469
[INFO] processing batch 17/469
[INFO] processing batch 18/469
[INFO] processing batch 19/469
[INFO] processing batch 20/469
[INFO] processing batch 21/469
[INFO] processing batch 22/469
[INFO] processing batch 23/469
[INFO] processing batch 24/469
[INFO] processing batch 25/469
[INFO] processing batch 26/469
[INFO] processing batch 27/469
[INFO] processing batch 28/469
[INFO] processing batch 29/469
[INFO] processing batch 30/469
[INFO] processing batch 31/469
[INFO] processi

[INFO] processing batch 202/469
[INFO] processing batch 203/469
[INFO] processing batch 204/469
[INFO] processing batch 205/469
[INFO] processing batch 206/469
[INFO] processing batch 207/469
[INFO] processing batch 208/469
[INFO] processing batch 209/469
[INFO] processing batch 210/469
[INFO] processing batch 211/469
[INFO] processing batch 212/469
[INFO] processing batch 213/469
[INFO] processing batch 214/469
[INFO] processing batch 215/469
[INFO] processing batch 216/469
[INFO] processing batch 217/469
[INFO] processing batch 218/469
[INFO] processing batch 219/469
[INFO] processing batch 220/469
[INFO] processing batch 221/469
[INFO] processing batch 222/469
[INFO] processing batch 223/469
[INFO] processing batch 224/469
[INFO] processing batch 225/469
[INFO] processing batch 226/469
[INFO] processing batch 227/469
[INFO] processing batch 228/469
[INFO] processing batch 229/469
[INFO] processing batch 230/469
[INFO] processing batch 231/469
[INFO] processing batch 232/469
[INFO] p

[INFO] processing batch 302/469
[INFO] processing batch 303/469
[INFO] processing batch 304/469
[INFO] processing batch 305/469
[INFO] processing batch 306/469
[INFO] processing batch 307/469
[INFO] processing batch 308/469
[INFO] processing batch 309/469
[INFO] processing batch 310/469
[INFO] processing batch 311/469
[INFO] processing batch 312/469
[INFO] processing batch 313/469
[INFO] processing batch 314/469
[INFO] processing batch 315/469
[INFO] processing batch 316/469
[INFO] processing batch 317/469
[INFO] processing batch 318/469
[INFO] processing batch 319/469
[INFO] processing batch 320/469
[INFO] processing batch 321/469
[INFO] processing batch 322/469
[INFO] processing batch 323/469
[INFO] processing batch 324/469
[INFO] processing batch 325/469
[INFO] processing batch 326/469
[INFO] processing batch 327/469
[INFO] processing batch 328/469
[INFO] processing batch 329/469
[INFO] processing batch 330/469
[INFO] processing batch 331/469
[INFO] processing batch 332/469
[INFO] p

[INFO] processing batch 402/469
[INFO] processing batch 403/469
[INFO] processing batch 404/469
[INFO] processing batch 405/469
[INFO] processing batch 406/469
[INFO] processing batch 407/469
[INFO] processing batch 408/469
[INFO] processing batch 409/469
[INFO] processing batch 410/469
[INFO] processing batch 411/469
[INFO] processing batch 412/469
[INFO] processing batch 413/469
[INFO] processing batch 414/469
[INFO] processing batch 415/469
[INFO] processing batch 416/469
[INFO] processing batch 417/469
[INFO] processing batch 418/469
[INFO] processing batch 419/469
[INFO] processing batch 420/469
[INFO] processing batch 421/469
[INFO] processing batch 422/469
[INFO] processing batch 423/469
[INFO] processing batch 424/469
[INFO] processing batch 425/469
[INFO] processing batch 426/469
[INFO] processing batch 427/469
[INFO] processing batch 428/469
[INFO] processing batch 429/469
[INFO] processing batch 430/469
[INFO] processing batch 431/469
[INFO] processing batch 432/469
[INFO] p

[INFO] processing batch 33/201
[INFO] processing batch 34/201
[INFO] processing batch 35/201
[INFO] processing batch 36/201
[INFO] processing batch 37/201
[INFO] processing batch 38/201
[INFO] processing batch 39/201
[INFO] processing batch 40/201
[INFO] processing batch 41/201
[INFO] processing batch 42/201
[INFO] processing batch 43/201
[INFO] processing batch 44/201
[INFO] processing batch 45/201
[INFO] processing batch 46/201
[INFO] processing batch 47/201
[INFO] processing batch 48/201
[INFO] processing batch 49/201
[INFO] processing batch 50/201
[INFO] processing batch 51/201
[INFO] processing batch 52/201
[INFO] processing batch 53/201
[INFO] processing batch 54/201
[INFO] processing batch 55/201
[INFO] processing batch 56/201
[INFO] processing batch 57/201
[INFO] processing batch 58/201
[INFO] processing batch 59/201
[INFO] processing batch 60/201
[INFO] processing batch 61/201
[INFO] processing batch 62/201
[INFO] processing batch 63/201
[INFO] processing batch 64/201
[INFO] p

[INFO] processing batch 134/201
[INFO] processing batch 135/201
[INFO] processing batch 136/201
[INFO] processing batch 137/201
[INFO] processing batch 138/201
[INFO] processing batch 139/201
[INFO] processing batch 140/201
[INFO] processing batch 141/201
[INFO] processing batch 142/201
[INFO] processing batch 143/201
[INFO] processing batch 144/201
[INFO] processing batch 145/201
[INFO] processing batch 146/201
[INFO] processing batch 147/201
[INFO] processing batch 148/201
[INFO] processing batch 149/201
[INFO] processing batch 150/201
[INFO] processing batch 151/201
[INFO] processing batch 152/201
[INFO] processing batch 153/201
[INFO] processing batch 154/201
[INFO] processing batch 155/201
[INFO] processing batch 156/201
[INFO] processing batch 157/201
[INFO] processing batch 158/201
[INFO] processing batch 159/201
[INFO] processing batch 160/201
[INFO] processing batch 161/201
[INFO] processing batch 162/201
[INFO] processing batch 163/201
[INFO] processing batch 164/201
[INFO] p

## Preparing the Data for Train, Validation and Test

Loading generated image features and splitting them into train, validation and test.

The train set will be used for training and validation.  
The validation set will be used for testing.

Naming Convention

| Data Split | X         | y         | y (one-hot-encoded) |
| :--------- | :-------- | :-------- | :------------------ |
| Training   | `X_train` | `y_train` | `Y_train`           |
| Validation | `X_val`   | `y_val`   | `Y_val`             |
| Testing    | `X_test`  | `y_test`  | `Y_test`            |

In [4]:
# function to load features, labels and imagepath from the extraction
def getDataAndImagePaths(model_name, split_type):
    X = pickle.loads(open(f"{FEATURES_OUTPUT_PATH}/{model_name.lower()}_{split_type}_features.pkl", "rb").read())
    y = pickle.loads(open(f"{FEATURES_OUTPUT_PATH}/{model_name.lower()}_{split_type}_labels.pkl", "rb").read())
    imagePaths = pickle.loads(open(f"{FEATURES_OUTPUT_PATH}/{model_name.lower()}_{split_type}_imagepaths.pkl", "rb").read())
    
    return (X, y, imagePaths)

## MobileNetV2 with pretrained weights

In [5]:
model_name = "Cassava_Dataset_Xception_pretrained"

# load the data from disk
print(f"[INFO] loading training data for {model_name}...")
(data, labels, imagePaths) = getDataAndImagePaths(model_name, split_type = "train")

# splitting the data and label into train validation set
print(f"[INFO] splitting the datasets into train and validation...")
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size = 0.3, random_state = 42)

# load testing data from disk
print(f"[INFO] loading testing data for {model_name}...")
(X_test, y_test, imagePaths_test) = getDataAndImagePaths(model_name, split_type = "test")
print("[INFO] data loaded...")

[INFO] loading training data for Cassava_Dataset_Xception_pretrained...
[INFO] splitting the datasets into train and validation...
[INFO] loading testing data for Cassava_Dataset_Xception_pretrained...
[INFO] data loaded...


In [6]:
# convert the labels from integers to one-hot-encoded vectors
lb = LabelBinarizer()
Y_train = lb.fit_transform(y_train)
Y_val = lb.transform(y_val)
Y_test = lb.transform(y_test)

In [7]:
# shape of features
print(X_train.shape)
print(X_val.shape)
print(X_test.shape)
print()

# shape of labels
print(y_train.shape)
print(y_val.shape)
print(y_test.shape)
print()

# shape of labels after encoding
print(Y_train.shape)
print(Y_val.shape)
print(Y_test.shape)

(10486, 294)
(4494, 294)
(6417, 294)

(10486,)
(4494,)
(6417,)

(10486, 5)
(4494, 5)
(6417, 5)


### Neural Network Classifier

In [8]:
from keras.models import Sequential
from keras.layers.core import Dense
from keras.layers import Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

In [9]:
input_shape = X_train.shape[1]
print(input_shape)

294


In [10]:
model = Sequential()
model.add(Dense(512, input_shape = (input_shape,), activation = "relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(512, activation = "relu"))
model.add(BatchNormalization())
model.add(Dropout(0.25))
model.add(Dense(5, activation="softmax"))

In [11]:
EPOCHS = 40
BATCH_SIZE = 16

model.compile(loss = "categorical_crossentropy", optimizer = Adam(learning_rate = 0.001), metrics = ["accuracy"])

In [12]:
# Reduce learning rate when there is a change lesser than <min_delta> in <val_accuracy> for more than <patience> epochs
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_accuracy',
                                                 mode = 'max',
                                                 min_delta = 0.01,
                                                 patience = 3,
                                                 factor = 0.25,
                                                 verbose = 1,
                                                 cooldown = 0,
                                                 min_lr = 0.0001)

# Stop the training process when there is a change lesser than <min_delta> in <val_accuracy> for more than <patience> epochs
early_stopper = tf.keras.callbacks.EarlyStopping(monitor = 'val_accuracy',
                                                 mode = 'max',
                                                 min_delta = 0.005,
                                                 patience = 10,
                                                 verbose = 1,
                                                 restore_best_weights = True)

# train the neural network
his_fe = model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs = EPOCHS, 
                            batch_size = BATCH_SIZE, callbacks = [early_stopper, reduce_lr])

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 4: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 7: ReduceLROnPlateau reducing learning rate to 0.0001.
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 17: early stopping


In [13]:
score = model.evaluate(X_val, Y_val, verbose = 1)
print('Test accuracy:', score[1])

Test accuracy: 0.722741425037384


In [17]:
predy = model.predict(X_test)
pred = np.argmax(predy, axis = 1)
ground = np.argmax(Y_test, axis = 1)
print(classification_report(ground, pred))

              precision    recall  f1-score   support

           0       0.42      0.30      0.35       326
           1       0.57      0.46      0.51       656
           2       0.70      0.26      0.38       715
           3       0.79      0.96      0.86      3947
           4       0.51      0.39      0.44       773

    accuracy                           0.73      6417
   macro avg       0.60      0.47      0.51      6417
weighted avg       0.70      0.73      0.70      6417



In [18]:
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [19]:
clf_SV = SVC(kernel = 'linear', probability = True)
clf_SV = clf_SV.fit(X_train, np.ravel(y_train))
y_pred = clf_SV.predict(X_test)

print("Accuracy on training set: {:.5f}".format(clf_SV.score(X_train, y_train)))
print("Accuracy on test set:     {:.5f}".format(clf_SV.score(X_test, y_test)))

Accuracy on training set: 0.83797
Accuracy on test set:     0.72059


In [25]:
clf_KNN = KNeighborsClassifier(n_neighbors = 6)
clf_KNN = clf_KNN.fit(X_train, np.ravel(y_train))
y_pred = clf_KNN.predict(X_test)

print("Accuracy on training set: {:.5f}".format(clf_KNN.score(X_train, y_train)))
print("Accuracy on test set:     {:.5f}".format(clf_KNN.score(X_test, y_test)))

Accuracy on training set: 0.83092
Accuracy on test set:     0.70079


In [32]:
clf_DT = DecisionTreeClassifier(criterion = 'entropy', max_depth = 5, splitter = 'best')
clf_DT = clf_DT.fit(X_train, np.ravel(y_train))
y_pred = clf_DT.predict(X_test)

print("Accuracy on training set: {:.5f}".format(clf_DT.score(X_train, y_train)))
print("Accuracy on test set:     {:.5f}".format(clf_DT.score(X_test, y_test)))

Accuracy on training set: 0.77303
Accuracy on test set:     0.69238


In [37]:
clf_RF = RandomForestClassifier(max_depth = 5, random_state = 20)
clf_RF = clf_RF.fit(X_train, np.ravel(y_train))
y_pred = clf_RF.predict(X_test)

print("Accuracy on training set: {:.5f}".format(clf_RF.score(X_train, y_train)))
print("Accuracy on test set:     {:.5f}".format(clf_RF.score(X_test, y_test)))

Accuracy on training set: 0.79973
Accuracy on test set:     0.71498
