#CASE STUDY </br>
###Detection Of Dendrites Using Machine Learning Techniques

Installing dependancies

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
import os
import glob
import numpy as np
import pandas as pd
import cv2
from tensorflow.keras.preprocessing.image import img_to_array,load_img
from tensorflow.keras.models import Model
from os.path import join
from sklearn.model_selection import train_test_split

Loading the Image Datasets into a DataFrame

In [None]:
dendrite_dir = "/content/gdrive/MyDrive/dataset 2/Dendritic"
all_dendrite_path = [join(dendrite_dir,filename) for filename in os.listdir(dendrite_dir)]

non_dendrite_dir = "/content/gdrive/MyDrive/dataset 2/Non-Dendritic"
non_dendrite_path = [join(non_dendrite_dir,filename) for filename in os.listdir(non_dendrite_dir)]

all_paths = all_dendrite_path + non_dendrite_path

df1 = pd.DataFrame({
    'path': all_paths,
    'is_present': [1 if path in all_dendrite_path else 0 for path in all_paths] })

In [None]:
dendrite_dir = "/content/gdrive/MyDrive/matsc_dataset1/dendrite"
all_dendrite_path = [join(dendrite_dir,filename) for filename in os.listdir(dendrite_dir)]

non_dendrite_dir = "/content/gdrive/MyDrive/matsc_dataset1/non_dendrite"
non_dendrite_path = [join(non_dendrite_dir,filename) for filename in os.listdir(non_dendrite_dir)]

all_paths = all_dendrite_path + non_dendrite_path

df2 = pd.DataFrame({
    'path': all_paths,
    'is_present': [1 if path in all_dendrite_path else 0 for path in all_paths] })

In [None]:
frame = [df1,df2]
df = pd.concat(frame)

In [None]:
df

Unnamed: 0,path,is_present
0,/content/gdrive/MyDrive/dataset 2/Dendritic/20...,1
1,/content/gdrive/MyDrive/dataset 2/Dendritic/20...,1
2,/content/gdrive/MyDrive/dataset 2/Dendritic/11...,1
3,/content/gdrive/MyDrive/dataset 2/Dendritic/8....,1
4,/content/gdrive/MyDrive/dataset 2/Dendritic/5....,1
...,...,...
568,/content/gdrive/MyDrive/matsc_dataset1/non_den...,0
569,/content/gdrive/MyDrive/matsc_dataset1/non_den...,0
570,/content/gdrive/MyDrive/matsc_dataset1/non_den...,0
571,/content/gdrive/MyDrive/matsc_dataset1/non_den...,0


Extracting Features for Image using Pre-trained Models like ResNet50, Inception & EfficientNetB0

In [None]:
features1 = {}
features2 = {}
features3 = {}
l = df["path"].to_list()
from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input
model = ResNet50()
model = Model(inputs = model.inputs, outputs = model.layers[-2].output)
c = 0
for i in l:
    img = load_img(i,target_size=(224,224))
    img = img_to_array(img)
    img = img.reshape((1,img.shape[0],img.shape[1],img.shape[2]))
    img = preprocess_input(img)
    feature = model.predict(img, verbose =0)
    features1[c] = feature
    c+=1
from tensorflow.keras.applications.inception_v3 import InceptionV3,preprocess_input
model = InceptionV3()
model = Model(inputs = model.inputs, outputs = model.layers[-2].output)
c = 0
for i in l:
    img = load_img(i,target_size=(299,299))
    img = img_to_array(img)
    img = img.reshape((1,img.shape[0],img.shape[1],img.shape[2]))
    img = preprocess_input(img)
    feature = model.predict(img, verbose =0)
    features2[c] = feature
    c+=1
from tensorflow.keras.applications.efficientnet import EfficientNetB0,preprocess_input
model = EfficientNetB0()
model = Model(inputs = model.inputs, outputs = model.layers[-2].output)
c = 0
for i in l:
    img = load_img(i,target_size=(224,224))
    img = img_to_array(img)
    img = img.reshape((1,img.shape[0],img.shape[1],img.shape[2]))
    img = preprocess_input(img)
    feature = model.predict(img, verbose =0)
    features3[c] = feature
    c+=1
z = []
for i in range(0,c):
    arr1 = features1[i]
    arr2 = features2[i]
    arr3 = features3[i]
    arr = np.hstack((arr1,arr2,arr3))
    z.append(arr)
z = np.vstack(z)

In [None]:
z.shape

(1148, 5376)

Spliting Data Into Train,Test sets

In [None]:
X = pd.DataFrame.from_records(z)
y = df["is_present"]


In [None]:
X.shape

(1148, 5376)

Feature Preprocessing

In [None]:
from sklearn.preprocessing import StandardScaler
def preprocessing(train, test):
  stdSlr = StandardScaler().fit(train)
  X_train = stdSlr.transform(train)
  X_test = stdSlr.transform(test)
  return X_train,X_test

Dimensionality Reduction Of Generated Features

In [None]:
from sklearn.decomposition import PCA
def principal_components(train_data, test_data):
	pca = PCA()
	train = train_data
	train_data = pca.fit(train_data)
	var = pca.explained_variance_ratio_
	sum = 0
	for i in range(len(var)):
		sum += var[i]
		if sum > 0.95:
			break
	pca = PCA(n_components=i)
	train_data = train
	train_pca = pca.fit_transform(train_data)
	test_pca = pca.transform(test_data)
	return train_pca, test_pca

Applying Cross-Validation

In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from sklearn import svm, ensemble
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import RidgeClassifierCV
from sklearn.neighbors import NearestCentroid
from sklearn.model_selection import GridSearchCV
import numpy as np

val_split = 3


def logreg(train, test, train_labels, test_labels):
	C = [10*i for i in range(-3,2)]
	params={'C': C}
	clf = GridSearchCV(LogisticRegression(class_weight='balanced'),params,cv=val_split)
	return fit_classifier(clf, train, train_labels, test, test_labels)
 
def random_forests(train, test, train_labels, test_labels):
	estimators = [50*i for i in range(4, 6)]
	parameters = {'n_estimators': estimators}
	clf = GridSearchCV(ensemble.RandomForestClassifier(class_weight='balanced'), parameters, cv=val_split)
	return fit_classifier(clf, train, train_labels, test, test_labels)
 
def LDA(train, test, train_labels, test_labels,solver ='svd'):
	parameters = {'solver': [solver]}
	clf = GridSearchCV(LinearDiscriminantAnalysis(), parameters, cv=val_split)
	return fit_classifier(clf, train, train_labels, test, test_labels)

def RidgeCV(train, test, train_labels, test_labels):
  alphas = [0.01, 0.1, 1, 10]
  parameters = {'alphas' : alphas}
  clf = GridSearchCV(RidgeClassifierCV(fit_intercept=True,scoring ='accuracy'),parameters, cv=val_split)
  return fit_classifier(clf, train, train_labels, test, test_labels)
 
def fit_classifier(clf, train, train_labels, test, test_labels):
	clf.fit(train, train_labels)
	pred = clf.predict(test)
	pred = pred.ravel()
	pred = pred.tolist()
	test_labels = test_labels.ravel()
	act_labels = test_labels.tolist()
	acc = accuracy_score(act_labels,pred)
	f1 = f1_score(act_labels,pred,average='weighted')
	prec = precision_score(act_labels,pred,average='weighted')
	rec = recall_score(act_labels,pred,average='weighted')
	conf = confusion_matrix(act_labels,pred)
	return acc, f1, prec, rec, conf, pred

Random Forest

In [None]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5)
a = []
f = []
p = []
r = []
for train_index, test_index in skf.split(X,y):
  X_train, X_test = X.iloc[train_index], X.iloc[test_index]
  y_train, y_test = y.iloc[train_index], y.iloc[test_index]
  X_train, X_test = preprocessing(X_train, X_test)
  X_train, X_test = principal_components(X_train, X_test)
  acc, f1, prec, rec, conf, pred = random_forests(X_train, X_test, y_train, y_test) 
  a.append(acc)
  f.append(f1)
  p.append(prec)
  r.append(rec)
print("Accuracy: " + str(np.mean(a)) + '\n')
print("F1 - score: " + str(np.mean(f)) + '\n')
print("Precision: " + str(np.mean(p)) + '\n')
print("Recall: " + str(np.mean(r)) + '\n')

Accuracy: 0.8355268653882666

F1 - score: 0.8078014888435817

Precision: 0.8861140223822369

Recall: 0.8355268653882666



Logistic Regression

In [None]:
from sklearn.model_selection import StratifiedKFold
from lazypredict.Supervised import LazyClassifier
skf = StratifiedKFold(n_splits=5)
a = []
f = []
p = []
r = []
for train_index, test_index in skf.split(X,y):
  X_train, X_test = X.iloc[train_index], X.iloc[test_index]
  y_train, y_test = y.iloc[train_index], y.iloc[test_index]
  X_train, X_test = preprocessing(X_train, X_test)
  X_train, X_test = principal_components(X_train, X_test)
  acc, f1, prec, rec, conf, pred = logreg(X_train, X_test, y_train, y_test) 
  a.append(acc)
  f.append(f1)
  p.append(prec)
  r.append(rec)
print("Accuracy: " + str(np.mean(a)) + '\n')
print("F1 - score: " + str(np.mean(f)) + '\n')
print("Precision: " + str(np.mean(p)) + '\n')
print("Recall: " + str(np.mean(r)) + '\n')

Accuracy: 0.8720410100626543

F1 - score: 0.8653910596627081

Precision: 0.8989325005643843

Recall: 0.8720410100626543



LDA

In [None]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5)
a = []
f = []
p = []
r = []
for train_index, test_index in skf.split(X,y):
  X_train, X_test = X.iloc[train_index], X.iloc[test_index]
  y_train, y_test = y.iloc[train_index], y.iloc[test_index]
  X_train, X_test = preprocessing(X_train, X_test)
  X_train, X_test = principal_components(X_train, X_test)
  acc, f1, prec, rec, conf, pred = LDA(X_train, X_test, y_train, y_test) 
  a.append(acc)
  f.append(f1)
  p.append(prec)
  r.append(rec)
print("Accuracy: " + str(np.mean(a)) + '\n')
print("F1 - score: " + str(np.mean(f)) + '\n')
print("Precision: " + str(np.mean(p)) + '\n')
print("Recall: " + str(np.mean(r)) + '\n')

Accuracy: 0.8868501993544713

F1 - score: 0.8819052173710704

Precision: 0.9093725776832329

Recall: 0.8868501993544713



RidgeCV

In [None]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5)
a = []
f = []
p = []
r = []
for train_index, test_index in skf.split(X,y):
  X_train, X_test = X.iloc[train_index], X.iloc[test_index]
  y_train, y_test = y.iloc[train_index], y.iloc[test_index]
  X_train, X_test = preprocessing(X_train, X_test)
  X_train, X_test = principal_components(X_train, X_test)
  acc, f1, prec, rec, conf, pred = RidgeCV(X_train, X_test, y_train, y_test) 
  a.append(acc)
  f.append(f1)
  p.append(prec)
  r.append(rec)
print("Accuracy: " + str(np.mean(a)) + '\n')
print("F1 - score: " + str(np.mean(f)) + '\n')
print("Precision: " + str(np.mean(p)) + '\n')
print("Recall: " + str(np.mean(r)) + '\n')

Accuracy: 0.8885855325612303

F1 - score: 0.884024043076743

Precision: 0.9110843784786123

Recall: 0.8885855325612303

