In [1]:
!pip install hiclass

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting hiclass
  Downloading hiclass-4.0.11-py3-none-any.whl (23 kB)
Collecting ray
  Downloading ray-1.13.0-cp37-cp37m-manylinux2014_x86_64.whl (54.5 MB)
[K     |████████████████████████████████| 54.5 MB 132 kB/s 
Collecting grpcio<=1.43.0,>=1.28.1
  Downloading grpcio-1.43.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.1 MB)
[K     |████████████████████████████████| 4.1 MB 38.3 MB/s 
[?25hCollecting aiosignal
  Downloading aiosignal-1.2.0-py3-none-any.whl (8.2 kB)
Collecting virtualenv
  Downloading virtualenv-20.14.1-py2.py3-none-any.whl (8.8 MB)
[K     |████████████████████████████████| 8.8 MB 25.4 MB/s 
[?25hCollecting frozenlist
  Downloading frozenlist-1.3.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (144 kB)
[K     |████████████████████████████████| 144 kB 26.8 MB/s 
Collecting platformdirs<3,>=2
  Dow

In [2]:
!pip install mahotas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mahotas
  Downloading mahotas-1.4.12-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (5.7 MB)
[K     |████████████████████████████████| 5.7 MB 5.4 MB/s 
Installing collected packages: mahotas
Successfully installed mahotas-1.4.12


In [4]:
import mahotas
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from pathlib import Path
from PIL import Image, ImageOps
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn import metrics
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.pipeline import Pipeline
from hiclass import LocalClassifierPerNode
from hiclass import LocalClassifierPerParentNode
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import SelectKBest, chi2

In [5]:
from google.colab import drive
drive.mount('/gdrive', force_remount=True)

Mounted at /gdrive


### **Load Images and Extract TAS Descriptors**

In [6]:
image_dir = "/gdrive/My Drive/Project/Denoised_CLAHE_Cl3"

In [7]:
def load_data(tag='train'):
  tag_dir = os.path.join(image_dir, tag)
  tag_path = Path(tag_dir)
  data = [] # Images
  cat = [] # Category
  subcat = []
  patient = []
  file_name = []
  for mag_dir in tag_path.iterdir():  
    mag_label = mag_dir.stem
    print("*",mag_label)
    if mag_label == "40X":
      print("Got it!")
      for img_name in mag_dir.glob('*.png'):
        img_label = img_name.stem
        splitted_image_name = img_label.split('_')
        cat_label = splitted_image_name[1]
        remaining_part = splitted_image_name[2].split('-')
        subcat_label = remaining_part[0]
        patient_label = remaining_part[2]
        img = mahotas.imread(img_name.as_posix())
        img = img[:, :, 0]
        feature = mahotas.features.pftas(img)
        hi_label = [cat_label, subcat_label]
        data.append(feature) # append the feature to the data
        cat.append(hi_label) # append the label to the category
        subcat.append(subcat_label)
        patient.append(patient_label)
        file_name.append(img_label)
  return data, cat, subcat, patient, file_name

In [8]:
# train images
vec_train, cat_train, subcat_train, patient_train, image_name_train = load_data('train')
# test images
vec_test, cat_test, subcat_test, patient_test, image_name_test = load_data('test')

* 40X
Got it!
* 100X
* 200X
* 400X
* 40X
Got it!
* 100X
* 200X
* 400X


### **Function to Compute Patient Recognition Rate**

In [9]:
def evaluate_recognition_rate(prediction):
  unique_patient_test = list(np.unique(np.array(patient_test)))
  num_images_per_patient = []
  num_correctly_classified = []
  num = 0
  total_patient_score = 0
  num_patient = 0
  for patient in unique_patient_test:
    num_patient = num_patient + 1
    Np = 0
    Nrec = 0
    image_index_counter = -1
    for image_name in image_name_test:
      image_index_counter = image_index_counter + 1
      image_name_splitted = image_name.split('-')
      if patient == image_name_splitted[2]:
        first_part_splitted = image_name_splitted[0].split('_')
        image_class = first_part_splitted[2]
        if image_class == prediction[image_index_counter]:
          Nrec = Nrec + 1
        Np = Np + 1
    patient_score = Nrec/Np
    #print(patient_score)
    num_images_per_patient.append(Np)
    num_correctly_classified.append(Nrec)
    total_patient_score = total_patient_score + patient_score
    num = num + Np    
  recognition_rate = total_patient_score/num_patient
  print("Summation of patient score: ", total_patient_score)
  print("Total Number of Patients: ", num_patient)
  print("Recognition Rate: ", recognition_rate)

### **Function to Identify Predicted Classes and Subclasses**

In [10]:
def return_predicted_categories(predictions):
  pred_column1 = []
  pred_column2 = []
  cat_test_column1 = []
  cat_test_column2 = []

  n = 0
  for row in predictions:
    pred_column1.append(predictions[n][0])
    pred_column2.append(predictions[n][1])
    cat_test_column1.append(cat_test[n][0])
    cat_test_column2.append(cat_test[n][1])
    n = n+1

  return pred_column1, pred_column2, cat_test_column1, cat_test_column2

### **SVM Fit**

In [11]:
pipe1 = Pipeline([
                  ("select",SelectKBest(chi2,k=30)),
                 ("scale", StandardScaler()),
                 ("classifier", SVC(probability=True, kernel = 'rbf', gamma = 0.1, C = 50)) #radial basis function kernel
                 ])

In [12]:
classifier = LocalClassifierPerParentNode(local_classifier=pipe1)
classifier.fit(vec_train, cat_train)

LocalClassifierPerParentNode(local_classifier=Pipeline(steps=[('select',
                                                               SelectKBest(k=30,
                                                                           score_func=<function chi2 at 0x7fd5670ed0e0>)),
                                                              ('scale',
                                                               StandardScaler()),
                                                              ('classifier',
                                                               SVC(C=50,
                                                                   gamma=0.1,
                                                                   probability=True))]))

**Accuracy**

In [13]:
predictions = classifier.predict(vec_test)
pred_column1, pred_column2, cat_test_column1, cat_test_column2 = return_predicted_categories(predictions)
print('Parent Class Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column1, cat_test_column1)))
print('Subclass Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column2, cat_test_column2)))

Parent Class Prediction accuracy: 77.72%
Subclass Prediction accuracy: 36.64%


**Patient Recognition Rate**

In [14]:
evaluate_recognition_rate(pred_column2)

Summation of patient score:  11.11394550621531
Total Number of Patients:  28
Recognition Rate:  0.39692662522197536


### **Random Forest**

In [15]:
pipe2 = Pipeline([
                 ("select",SelectKBest(chi2,k=20)), 
                 ("scale", StandardScaler()),
                 ("classifier", RandomForestClassifier(class_weight='balanced', n_estimators = 50, random_state = 42))
                 ])

In [16]:
classifier = LocalClassifierPerParentNode(local_classifier=pipe2)
classifier.fit(vec_train, cat_train)

LocalClassifierPerParentNode(local_classifier=Pipeline(steps=[('select',
                                                               SelectKBest(k=20,
                                                                           score_func=<function chi2 at 0x7fd5670ed0e0>)),
                                                              ('scale',
                                                               StandardScaler()),
                                                              ('classifier',
                                                               RandomForestClassifier(class_weight='balanced',
                                                                                      n_estimators=50,
                                                                                      random_state=42))]))

**Accuracy**

In [17]:
predictions = classifier.predict(vec_test)
pred_column1, pred_column2, cat_test_column1, cat_test_column2 = return_predicted_categories(predictions)
print('Parent Class Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column1, cat_test_column1)))
print('Subclass Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column2, cat_test_column2)))

Parent Class Prediction accuracy: 75.84%
Subclass Prediction accuracy: 39.73%


**Patient Recognition Rate**

In [18]:
evaluate_recognition_rate(pred_column2)

Summation of patient score:  11.235048339435018
Total Number of Patients:  28
Recognition Rate:  0.4012517264083935


### **KNN**

In [19]:
pipe3 = Pipeline([
                 ("scale",StandardScaler()),
                 ("classifier", KNeighborsClassifier(n_neighbors=9))
                 ])

In [20]:
classifier = LocalClassifierPerParentNode(local_classifier=pipe3)
classifier.fit(vec_train, cat_train)

LocalClassifierPerParentNode(local_classifier=Pipeline(steps=[('scale',
                                                               StandardScaler()),
                                                              ('classifier',
                                                               KNeighborsClassifier(n_neighbors=9))]))

**Accuracy**

In [22]:
predictions = classifier.predict(vec_test)
pred_column1, pred_column2, cat_test_column1, cat_test_column2 = return_predicted_categories(predictions)
print('Parent Class Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column1, cat_test_column1)))
print('Subclass Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column2, cat_test_column2)))

Parent Class Prediction accuracy: 73.29%
Subclass Prediction accuracy: 37.58%


**Patient Recognition Rate**

In [23]:
evaluate_recognition_rate(pred_column2)

Summation of patient score:  10.754787280697792
Total Number of Patients:  28
Recognition Rate:  0.38409954573920685


### **Adaboost**

In [27]:
pipe4 = Pipeline([
                 ("scale",StandardScaler()),
                 ("classifier", AdaBoostClassifier(n_estimators=200, random_state=42))
                 ])

In [28]:
classifier = LocalClassifierPerParentNode(local_classifier=pipe4)
classifier.fit(vec_train, cat_train)

LocalClassifierPerParentNode(local_classifier=Pipeline(steps=[('scale',
                                                               StandardScaler()),
                                                              ('classifier',
                                                               AdaBoostClassifier(n_estimators=200,
                                                                                  random_state=42))]))

**Accuracy**

In [29]:
predictions = classifier.predict(vec_test)
pred_column1, pred_column2, cat_test_column1, cat_test_column2 = return_predicted_categories(predictions)
print('Parent Class Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column1, cat_test_column1)))
print('Subclass Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column2, cat_test_column2)))

Parent Class Prediction accuracy: 72.35%
Subclass Prediction accuracy: 37.85%


**Patient Recognition Rate**

In [30]:
evaluate_recognition_rate(pred_column2)

Summation of patient score:  11.979490220679903
Total Number of Patients:  28
Recognition Rate:  0.4278389364528537


### **MLP**

In [42]:
mlp = MLPClassifier(alpha=0.1, max_iter=2000)

In [43]:
classifier = LocalClassifierPerParentNode(local_classifier=mlp)
classifier.fit(vec_train, cat_train)

LocalClassifierPerParentNode(local_classifier=MLPClassifier(alpha=0.1,
                                                            max_iter=2000))

**Accuracy**

In [44]:
predictions = classifier.predict(vec_test)
pred_column1, pred_column2, cat_test_column1, cat_test_column2 = return_predicted_categories(predictions)
print('Parent Class Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column1, cat_test_column1)))
print('Subclass Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column2, cat_test_column2)))

Parent Class Prediction accuracy: 70.34%
Subclass Prediction accuracy: 42.82%


**Patient Recognition Rate**

In [45]:
evaluate_recognition_rate(pred_column2)

Summation of patient score:  12.35591373643231
Total Number of Patients:  28
Recognition Rate:  0.4412826334440111
