In [2]:
# Importing the Necessary Libraries
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix
from skimage import feature 
from imutils import build_montages
from imutils import paths
import numpy as np
import cv2
import os
import pickle

In [3]:
def quantify_image(img):

    '''
    Function to compute the histogram of oriented gradients feature vector of the given input img
    
    Parameters: img - Image file

    Output: Returns feature vector
    '''

    features = feature.hog(
                            image=img,
                            orientations=9,
                            pixels_per_cell=(10,10),
                            cells_per_block=(2,2),
                            transform_sqrt=True,
                            block_norm="L1")

    return features

In [4]:
def load_split(path):
    
    '''

    Takes the list of images from the input directory and then initialize the the images with the labels (i.e) Labelling Image.
    Here before labelling the image is preprocessed with the following steps: Grayscaling, Resizing, Thresholding and finally the image after is thresholded it is quantified to obtain the histogram of gradient features

    Parameters: path - Image Directory Source path

    Returns:
    A tuple containing two numpy arrays:
    data - an array of hog features computed on images
    labels - an array of image labels

    '''

    imagePaths = list(paths.list_images(path))
    data = []
    labels = []
    for imagePath in imagePaths:
        label = imagePath.split(os.path.sep)[-2]
        image = cv2.imread(imagePath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 
        image = cv2.resize(image, (200, 200))
        image = cv2.threshold (image, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU) [1]
        features = quantify_image(image)
        data.append(features)
        labels.append(label)
        
    return (np.array(data), np.array(labels))

In [5]:
# Loading Training and Testing data
trainingPath = r"dataset\training"
testingPath = r"dataset\testing"
# loading the training and testing data 
print("[INFO] loading data...")
(X_train, y_train) = load_split(trainingPath) 
(X_test, y_test) = load_split(testingPath)
# Encoding the labels
le = LabelEncoder()
print(y_test)
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test) 
print(X_train. shape, y_train.shape)

[INFO] loading data...
['healthy' 'healthy' 'healthy' 'healthy' 'healthy' 'healthy' 'healthy'
 'healthy' 'healthy' 'healthy' 'healthy' 'healthy' 'healthy' 'healthy'
 'healthy' 'healthy' 'healthy' 'healthy' 'healthy' 'healthy' 'healthy'
 'healthy' 'healthy' 'healthy' 'healthy' 'healthy' 'healthy' 'healthy'
 'healthy' 'healthy' 'parkinson' 'parkinson' 'parkinson' 'parkinson'
 'parkinson' 'parkinson' 'parkinson' 'parkinson' 'parkinson' 'parkinson'
 'parkinson' 'parkinson' 'parkinson' 'parkinson' 'parkinson' 'parkinson'
 'parkinson' 'parkinson' 'parkinson' 'parkinson' 'parkinson' 'parkinson'
 'parkinson' 'parkinson' 'parkinson' 'parkinson' 'parkinson' 'parkinson'
 'parkinson' 'parkinson']
(144, 12996) (144,)


In [6]:
X_train

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.24999804, 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.12675901, 0.        , 0.07381471, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [7]:
print("[INFO] training model")
model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)

[INFO] training model


In [8]:
# randomly select a few images and then initialize the output images # for the montage
testingPaths = list(paths.list_images(testingPath))
idxs = np.arange(0, len (testingPaths))
idxs = np.random.choice (idxs, size=(25,), replace=False)
images = []

# loop over the testing samples 
for i in idxs:
# load the testing image, clone it, and resize it
    image = cv2.imread(testingPaths[i])
    output = image.copy()
    output = cv2.resize(output, (128, 128))
    # pre-process the image in the same manner we did earlier
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.resize(image, (200, 200))
    image = cv2.threshold (image, 0, 255,
    cv2.THRESH_BINARY_INV | cv2. THRESH_OTSU)[1]

# quantify the image and make predictions based on the extracted features using the last trained Random Forest 
    features = quantify_image(image)
    preds = model.predict([features])
    label =le.inverse_transform(preds)[0]
# draw the colored class label on the output image and add it to # the set of output images
    color = (0, 255, 0) if label == "healthy" else (0, 0, 255)
    cv2.putText(output, label, (3, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
    images.append(output)
print(len(images))
# create a montage using 128x128 "tiles" with 5 rows and 5 columns
montage = build_montages (images, (128, 128), (5, 5))[0]
# show the output montage
cv2.imshow("Output", montage) 
cv2.waitKey()

25


KeyboardInterrupt: 

In [9]:
# make predictions on the testing data
predictions = model.predict(X_test) # compute the confusion matrix and and use it to derive the raw
# accuracy
cm = confusion_matrix(y_test, predictions).flatten() 
print(cm)
(tn, fp, fn, tp) = cm
accuracy  = (tp + tn) / float(cm.sum())
print(accuracy)

[24  6  7 23]
0.7833333333333333


In [10]:
pickle.dump(model, open('./artifacts/parkinson.pkl', 'wb'))