In [1]:
!pip install mahotas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mahotas
  Downloading mahotas-1.4.12-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (5.7 MB)
[K     |████████████████████████████████| 5.7 MB 4.6 MB/s 
Installing collected packages: mahotas
Successfully installed mahotas-1.4.12


In [45]:
import mahotas
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from pathlib import Path
from PIL import Image, ImageOps
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
import xgboost as xgb
import lightgbm as lgb
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, auc, roc_curve, roc_auc_score, recall_score, confusion_matrix
from sklearn.ensemble import VotingClassifier
from sklearn.multiclass import OneVsRestClassifier
from sklearn.multiclass import OneVsOneClassifier
from sklearn.multiclass import OutputCodeClassifier

In [3]:
from google.colab import drive
drive.mount('/gdrive', force_remount=True)

Mounted at /gdrive


### **Load Images and Extract TAS Descriptors**

In [4]:
image_dir = "/gdrive/My Drive/Project/Denoised_CLAHE_Cl3"

In [5]:
def load_data(tag='train'):
  tag_dir = os.path.join(image_dir, tag)
  tag_path = Path(tag_dir)
  data = [] # Images
  cat = [] # Category
  subcat = []
  patient = []
  file_name = []
  for mag_dir in tag_path.iterdir():  
    mag_label = mag_dir.stem
    print("*",mag_label)
    if mag_label == "40X":
      print("Got it!")
      for img_name in mag_dir.glob('*.png'):
        img_label = img_name.stem
        splitted_image_name = img_label.split('_')
        cat_label = splitted_image_name[1]
        remaining_part = splitted_image_name[2].split('-')
        subcat_label = remaining_part[0]
        patient_label = remaining_part[2]
        img = mahotas.imread(img_name.as_posix())
        img = img[:, :, 0]
        feature = mahotas.features.pftas(img)
        data.append(feature) # append the feature to the data
        cat.append(cat_label) # append the label to the category
        subcat.append(subcat_label)
        patient.append(patient_label)
        file_name.append(img_label)
  return data, cat, subcat, patient, file_name

In [6]:
# train images
vec_train, cat_train, subcat_train, patient_train, image_name_train = load_data('train')
# test images
vec_test, cat_test, subcat_test, patient_test, image_name_test = load_data('test')

* 40X
Got it!
* 100X
* 200X
* 400X
* 40X
Got it!
* 100X
* 200X
* 400X


### **List the Categories**

In [7]:
# Get unique categories for train data
labels = list(np.unique(np.array(subcat_train))) # convert categories to the numpy array and get unique values
labels

['A', 'DC', 'F', 'LC', 'MC', 'PC', 'PT', 'TA']

In [8]:
# Get unique categories for test data
labels = list(np.unique(np.array(subcat_test)))
labels

['A', 'DC', 'F', 'LC', 'MC', 'PC', 'PT', 'TA']

### **Label Encoding**

In [9]:
le = LabelEncoder()
label_train = le.fit_transform(subcat_train)
label_test = le.transform(subcat_test)

In [10]:
label_train

array([1, 1, 1, ..., 1, 1, 1])

In [None]:
label_test

### **Function to Compute Patient Recognition Rate**

In [12]:
def evaluate_recognition_rate(prediction):
  inverted_prediction = le.inverse_transform(prediction)
  unique_patient_test = list(np.unique(np.array(patient_test)))
  num_images_per_patient = []
  num_correctly_classified = []
  num = 0
  total_patient_score = 0
  num_patient = 0
  for patient in unique_patient_test:
    num_patient = num_patient + 1
    Np = 0
    Nrec = 0
    image_index_counter = -1
    for image_name in image_name_test:
      image_index_counter = image_index_counter + 1
      image_name_splitted = image_name.split('-')
      if patient == image_name_splitted[2]:
        first_part_splitted = image_name_splitted[0].split('_')
        image_class = first_part_splitted[2]
        if image_class == inverted_prediction[image_index_counter]:
          Nrec = Nrec + 1
        Np = Np + 1
    patient_score = Nrec/Np
    #print(patient_score)
    num_images_per_patient.append(Np)
    num_correctly_classified.append(Nrec)
    total_patient_score = total_patient_score + patient_score
    num = num + Np    
  recognition_rate = total_patient_score/num_patient
  print("Summation of patient score: ", total_patient_score)
  print("Total Number of Patients: ", num_patient)
  print("Recognition Rate: ", recognition_rate)

### **One Vs Rest Classifier**

In [75]:
model = OneVsRestClassifier(LinearSVC(random_state=0))   
model.fit(vec_train, label_train)
   
# Making a prediction on the test set
prediction = model.predict(vec_test)
   
# Evaluating the model
print(f"Test Set Accuracy : {accuracy_score(label_test, prediction) * 100}%\n")

# Patient recognition rate
evaluate_recognition_rate(prediction)

Test Set Accuracy : 44.29530201342282%

Summation of patient score:  12.802688435041377
Total Number of Patients:  28
Recognition Rate:  0.4572388726800492


### **One Vs One Classifier**

In [77]:
model = OneVsOneClassifier(SVC( probability=True, kernel= 'poly', gamma= 2, C=1))
model.fit(vec_train, label_train)
   
# Making a prediction on the test set
prediction = model.predict(vec_test)
   
# Evaluating the model
print(f"Test Set Accuracy : {accuracy_score(label_test, prediction) * 100}%\n")

# Patient recognition rate
evaluate_recognition_rate(prediction)

Test Set Accuracy : 44.0268456375839%

Summation of patient score:  12.54752293016999
Total Number of Patients:  28
Recognition Rate:  0.4481258189346425


### **Output Code Classifier**

In [78]:
model = OutputCodeClassifier(LinearSVC(random_state=0), code_size=2, random_state=0)
model.fit(vec_train, label_train)
   
# Making a prediction on the test set
prediction = model.predict(vec_test)
   
# Evaluating the model
print(f"Test Set Accuracy : {accuracy_score(label_test, prediction) * 100}%\n")

# Patient recognition rate
evaluate_recognition_rate(prediction)

Test Set Accuracy : 44.29530201342282%

Summation of patient score:  12.888913925237455
Total Number of Patients:  28
Recognition Rate:  0.46031835447276626
