**IMAGE PREPROCESSING**

**IMPORT THE NECESSARY LIBRARIES**

In [26]:
from sklearn.ensemble import RandomForestClassifier

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix
from google.colab.patches import cv2_imshow

from skimage import feature 
from imutils import build_montages 
from imutils import paths

import numpy as np
import cv2

import os 
import pickle


PATH FOR TRAIN AND TEST DATA

In [10]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [17]:
trainingpath=r"/content/drive/My Drive/dataset/spiral/training"  
testingpath=r"/content/drive/My Drive/dataset/spiral/testing"

QUANTIFYING IMAGES

In [18]:
def quantify_image(image):
    features = feature.hog(image, orientations=9,
                           pixels_per_cell=(10, 10), 
                           cells_per_block=(2, 2),
                           transform_sqrt=True, 
                           block_norm="L1")
    return features

LOADING TRAIN DATA AND TEST DATA

In [19]:
def load_split(path):
  #grab the list of img in the input directory, the initialize the list of data images and class labels
  imagePaths = list(paths.list_images (path))
  data=[]
  labels=[]
  #loop over the image paths 
  for imagePath in imagePaths:
    #extract the class label from the filename 
    label = imagePath.split(os.path.sep) [-2]
    #load the input image, convert it to grayscale, and resize # it to 200x200 pixels, ignoring aspect ratio
    image = cv2.imread(imagePath)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.resize(image, (200, 200))
    #threshold the image such that the drawing appears as white on a black background
    image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV | cv2. THRESH_OTSU) [1]
    features = quantify_image(image) # quantify the image 
    data.append(features)
    labels.append(label)#update the data and labels lists, respectively 
  return (np.array(data), np.array(labels))

In [20]:
print("[INFO] loading data...")
(X_train, y_train) = load_split(trainingpath)
(X_test, y_test) = load_split(testingpath)


[INFO] loading data...


LABEL ENCODING

In [21]:
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)
print(X_train.shape,y_train.shape)


(72, 12996) (72,)


**MODEL BUILDING**

TRAINING THE MODEL

In [22]:
print("[INFO] training model")
model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)

[INFO] training model


RandomForestClassifier()

TESTING THE MODEL

In [23]:
testingpath=list(paths.list_images(testingpath))
idxs=np.arange(0,len(testingpath))
idxs=np.random.choice(idxs,size=(25,),replace=False)
images=[]

In [33]:
from google.colab.patches import cv2_imshow
for i in idxs:
    image=cv2.imread(testingpath[i])
    output=image.copy()
        
    # load the input image,convert to grayscale and resize
    
    output=cv2.resize(output,(128,128))
    image=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
    image=cv2.resize(image,(200,200))
    image=cv2.threshold(image,0,255,cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
    
    #quantify the image and make predictions based on the  extracted feature using last trained random forest
    features=quantify_image(image)
    preds=model.predict([features])
    label=le.inverse_transform(preds)[0]
    #the set of output images
    if label=="healthy":
        color=(0,255,0)
    else:
        color=(0,0,255)
        
    cv2.putText(output,label,(3,20),cv2.FONT_HERSHEY_SIMPLEX,0.5,color,2)
    images.append(output)

#creating a montage
montage=build_montages(images,(128,128),(5,5))[0]
cv2.waitKey(0)

-1

MODEL EVALUATION

In [31]:
predictions = model.predict(X_test)

cm = confusion_matrix(y_test, predictions).flatten()
print(cm)
(tn, fp, fn, tp) = cm
accuracy = (tp + tn) / float(cm.sum())
print(accuracy)

[14  1  3 12]
0.8666666666666667


SAVE THE MODEL

In [36]:
pickle.dump(model,open('parkinson.pkl','wb')) 