In [1]:
!pip install hiclass

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting hiclass
  Downloading hiclass-4.0.11-py3-none-any.whl (23 kB)
Collecting ray
  Downloading ray-1.13.0-cp37-cp37m-manylinux2014_x86_64.whl (54.5 MB)
[K     |████████████████████████████████| 54.5 MB 97 kB/s 
Collecting virtualenv
  Downloading virtualenv-20.14.1-py2.py3-none-any.whl (8.8 MB)
[K     |████████████████████████████████| 8.8 MB 39.3 MB/s 
Collecting grpcio<=1.43.0,>=1.28.1
  Downloading grpcio-1.43.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.1 MB)
[K     |████████████████████████████████| 4.1 MB 8.5 MB/s 
Collecting aiosignal
  Downloading aiosignal-1.2.0-py3-none-any.whl (8.2 kB)
Collecting frozenlist
  Downloading frozenlist-1.3.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (144 kB)
[K     |████████████████████████████████| 144 kB 33.5 MB/s 
Collecting distlib<1,>=0.3.1
  Downloading distli

In [2]:
!pip install mahotas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mahotas
  Downloading mahotas-1.4.12-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (5.7 MB)
[K     |████████████████████████████████| 5.7 MB 8.6 MB/s 
Installing collected packages: mahotas
Successfully installed mahotas-1.4.12


In [3]:
import mahotas
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from pathlib import Path
from PIL import Image, ImageOps
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn import metrics
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.pipeline import Pipeline
from hiclass import LocalClassifierPerNode
from hiclass import LocalClassifierPerParentNode
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import SelectKBest, chi2

In [4]:
from google.colab import drive
drive.mount('/gdrive', force_remount=True)

Mounted at /gdrive


### **Load Images and Extract TAS Descriptors**

In [5]:
image_dir = "/gdrive/My Drive/Project/Denoised_CLAHE_Cl3"

In [6]:
def load_data(tag='train'):
  tag_dir = os.path.join(image_dir, tag)
  tag_path = Path(tag_dir)
  data = [] # Images
  cat = [] # Category
  subcat = []
  patient = []
  file_name = []
  for mag_dir in tag_path.iterdir():  
    mag_label = mag_dir.stem
    print("*",mag_label)
    if mag_label == "200X":
      print("Got it!")
      for img_name in mag_dir.glob('*.png'):
        img_label = img_name.stem
        splitted_image_name = img_label.split('_')
        cat_label = splitted_image_name[1]
        remaining_part = splitted_image_name[2].split('-')
        subcat_label = remaining_part[0]
        patient_label = remaining_part[2]
        img = mahotas.imread(img_name.as_posix())
        img = img[:, :, 0]
        feature = mahotas.features.pftas(img)
        hi_label = [cat_label, subcat_label]
        data.append(feature) # append the feature to the data
        cat.append(hi_label) # append the label to the category
        subcat.append(subcat_label)
        patient.append(patient_label)
        file_name.append(img_label)
  return data, cat, subcat, patient, file_name

In [7]:
# train images
vec_train, cat_train, subcat_train, patient_train, image_name_train = load_data('train')
# test images
vec_test, cat_test, subcat_test, patient_test, image_name_test = load_data('test')

* 40X
* 100X
* 200X
Got it!
* 400X
* 40X
* 100X
* 200X
Got it!
* 400X


### **Function to Compute Patient Recognition Rate**

In [8]:
def evaluate_recognition_rate(prediction):
  unique_patient_test = list(np.unique(np.array(patient_test)))
  num_images_per_patient = []
  num_correctly_classified = []
  num = 0
  total_patient_score = 0
  num_patient = 0
  for patient in unique_patient_test:
    num_patient = num_patient + 1
    Np = 0
    Nrec = 0
    image_index_counter = -1
    for image_name in image_name_test:
      image_index_counter = image_index_counter + 1
      image_name_splitted = image_name.split('-')
      if patient == image_name_splitted[2]:
        first_part_splitted = image_name_splitted[0].split('_')
        image_class = first_part_splitted[2]
        if image_class == prediction[image_index_counter]:
          Nrec = Nrec + 1
        Np = Np + 1
    patient_score = Nrec/Np
    #print(patient_score)
    num_images_per_patient.append(Np)
    num_correctly_classified.append(Nrec)
    total_patient_score = total_patient_score + patient_score
    num = num + Np    
  recognition_rate = total_patient_score/num_patient
  print("Summation of patient score: ", total_patient_score)
  print("Total Number of Patients: ", num_patient)
  print("Recognition Rate: ", recognition_rate)

### **Function to Identify Predicted Classes and Subclasses**

In [9]:
def return_predicted_categories(predictions):
  pred_column1 = []
  pred_column2 = []
  cat_test_column1 = []
  cat_test_column2 = []

  n = 0
  for row in predictions:
    pred_column1.append(predictions[n][0])
    pred_column2.append(predictions[n][1])
    cat_test_column1.append(cat_test[n][0])
    cat_test_column2.append(cat_test[n][1])
    n = n+1

  return pred_column1, pred_column2, cat_test_column1, cat_test_column2

### **SVM Fit**

In [10]:
pipe1 = Pipeline([
                 ("select",SelectKBest(chi2,k=30)),
                 ("scale", StandardScaler()),
                 ("classifier", SVC(class_weight='balanced', probability=True, kernel = 'rbf', gamma = 0.01, C = 100)) #radial basis function kernel
                 ])

In [11]:
classifier = LocalClassifierPerParentNode(local_classifier=pipe1)
classifier.fit(vec_train, cat_train)

LocalClassifierPerParentNode(local_classifier=Pipeline(steps=[('select',
                                                               SelectKBest(k=30,
                                                                           score_func=<function chi2 at 0x7fc1695327a0>)),
                                                              ('scale',
                                                               StandardScaler()),
                                                              ('classifier',
                                                               SVC(C=100,
                                                                   class_weight='balanced',
                                                                   gamma=0.01,
                                                                   probability=True))]))

**Accuracy**

In [12]:
predictions = classifier.predict(vec_test)
pred_column1, pred_column2, cat_test_column1, cat_test_column2 = return_predicted_categories(predictions)
print('Parent Class Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column1, cat_test_column1)))
print('Subclass Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column2, cat_test_column2)))

Parent Class Prediction accuracy: 78.09%
Subclass Prediction accuracy: 36.69%


**Patient Recognition Rate**

In [13]:
evaluate_recognition_rate(pred_column2)

Summation of patient score:  11.086449485460903
Total Number of Patients:  28
Recognition Rate:  0.3959446244807466


### **Random Forest**

In [14]:
pipe2 = Pipeline([
                 ("select",SelectKBest(chi2,k=30)),
                 ("scale", StandardScaler()),
                 ("classifier", RandomForestClassifier(class_weight='balanced', n_estimators = 50, random_state = 42))
                 ])

In [15]:
classifier = LocalClassifierPerParentNode(local_classifier=pipe2)
classifier.fit(vec_train, cat_train)

LocalClassifierPerParentNode(local_classifier=Pipeline(steps=[('select',
                                                               SelectKBest(k=30,
                                                                           score_func=<function chi2 at 0x7fc1695327a0>)),
                                                              ('scale',
                                                               StandardScaler()),
                                                              ('classifier',
                                                               RandomForestClassifier(class_weight='balanced',
                                                                                      n_estimators=50,
                                                                                      random_state=42))]))

**Accuracy**

In [16]:
predictions = classifier.predict(vec_test)
pred_column1, pred_column2, cat_test_column1, cat_test_column2 = return_predicted_categories(predictions)
print('Parent Class Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column1, cat_test_column1)))
print('Subclass Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column2, cat_test_column2)))

Parent Class Prediction accuracy: 77.42%
Subclass Prediction accuracy: 41.13%


**Patient Recognition Rate**

In [17]:
evaluate_recognition_rate(pred_column2)

Summation of patient score:  12.175848544686085
Total Number of Patients:  28
Recognition Rate:  0.43485173373878877


### **KNN**

In [18]:
pipe3 = Pipeline([
                 ("select",SelectKBest(chi2,k=30)), 
                 ("scale",StandardScaler()),
                 ("classifier", KNeighborsClassifier(n_neighbors=7))
                 ])

In [19]:
classifier = LocalClassifierPerParentNode(local_classifier=pipe3)
classifier.fit(vec_train, cat_train)

LocalClassifierPerParentNode(local_classifier=Pipeline(steps=[('select',
                                                               SelectKBest(k=30,
                                                                           score_func=<function chi2 at 0x7fc1695327a0>)),
                                                              ('scale',
                                                               StandardScaler()),
                                                              ('classifier',
                                                               KNeighborsClassifier(n_neighbors=7))]))

**Accuracy**

In [20]:
predictions = classifier.predict(vec_test)
pred_column1, pred_column2, cat_test_column1, cat_test_column2 = return_predicted_categories(predictions)
print('Parent Class Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column1, cat_test_column1)))
print('Subclass Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column2, cat_test_column2)))

Parent Class Prediction accuracy: 74.33%
Subclass Prediction accuracy: 40.32%


**Patient Recognition Rate**

In [21]:
evaluate_recognition_rate(pred_column2)

Summation of patient score:  12.080195251122241
Total Number of Patients:  28
Recognition Rate:  0.43143554468293716


### **Adaboost**

In [25]:
pipe4 = Pipeline([
                 ("classifier", AdaBoostClassifier(n_estimators=200, random_state=42))
                 ])

In [26]:
classifier = LocalClassifierPerParentNode(local_classifier=pipe4)
classifier.fit(vec_train, cat_train)

LocalClassifierPerParentNode(local_classifier=Pipeline(steps=[('classifier',
                                                               AdaBoostClassifier(n_estimators=200,
                                                                                  random_state=42))]))

**Accuracy**

In [28]:
predictions = classifier.predict(vec_test)
pred_column1, pred_column2, cat_test_column1, cat_test_column2 = return_predicted_categories(predictions)
print('Parent Class Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column1, cat_test_column1)))
print('Subclass Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column2, cat_test_column2)))

Parent Class Prediction accuracy: 71.91%
Subclass Prediction accuracy: 32.12%


**Patient Recognition Rate**

In [29]:
evaluate_recognition_rate(pred_column2)

Summation of patient score:  9.548053488862644
Total Number of Patients:  28
Recognition Rate:  0.341001910316523


### **MLP**

In [30]:
mlp = MLPClassifier(alpha=0.1, max_iter=2000)

In [31]:
classifier = LocalClassifierPerParentNode(local_classifier=mlp)
classifier.fit(vec_train, cat_train)

LocalClassifierPerParentNode(local_classifier=MLPClassifier(alpha=0.1,
                                                            max_iter=2000))

**Accuracy**

In [32]:
predictions = classifier.predict(vec_test)
pred_column1, pred_column2, cat_test_column1, cat_test_column2 = return_predicted_categories(predictions)
print('Parent Class Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column1, cat_test_column1)))
print('Subclass Prediction accuracy: {:.2%}'.format(accuracy_score(pred_column2, cat_test_column2)))

Parent Class Prediction accuracy: 73.12%
Subclass Prediction accuracy: 41.40%


**Patient Recognition Rate**

In [33]:
evaluate_recognition_rate(pred_column2)

Summation of patient score:  12.808908689429863
Total Number of Patients:  28
Recognition Rate:  0.4574610246224951
