# **Step-1:** Importing Required Libraries

In [1]:
pip install tensorflow

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: C:\Python310\python.exe -m pip install --upgrade pip


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import glob
import cv2
from keras.models import Model, Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
import os
import seaborn as sns
from keras.applications.resnet import ResNet50
from sklearn.ensemble import VotingClassifier

# **Step-2:** Performing Preprocessing

In [3]:
import os
import glob
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

# Set the path to the local dataset directory
dataset_path = "C:/Users/Ashish/Desktop/dataset"

# Capture training data and labels into respective lists
images = []
labels = []

# Iterate through each subdirectory in the dataset path
for directory_path in glob.glob(os.path.join(dataset_path, "*")):
    data_split = os.path.split(directory_path)
    label = data_split[-1]  # Use subdirectory name as label
    print(f"Processing label: {label}")
    
    for img_path in glob.glob(os.path.join(directory_path, "*.jpeg")):
        # Read, preprocess, and normalize the image
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.resize(img, (128, 128))
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        img = np.asarray(img)
        img = img / 126.5  # Normalize the image
        images.append(img)
        labels.append(label)

# Convert lists to NumPy arrays
images = np.array(images)
labels = np.array(labels)

# Split the dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(images, labels, train_size=0.7, random_state=1, shuffle=True)

# Print dataset statistics
print(f"Number of training samples: {len(X_train)}")
print(f"Number of testing samples: {len(X_test)}")


Processing label: AbdomenCT
Processing label: BreastMRI
Processing label: ChestCT
Processing label: CXR
Processing label: Hand
Processing label: HeadCT
Number of training samples: 18914
Number of testing samples: 8106


In [4]:
#Convert lists to arrays
test_images = np.array(X_test)
test_labels = np.array(Y_test)


In [5]:
train_images = np.array(X_train)
train_labels = np.array(Y_train)


In [6]:
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(test_labels)
test_labels_encoded = le.transform(test_labels)
le.fit(train_labels)
train_labels_encoded = le.transform(train_labels)

In [7]:
#Split data into test and train datasets (already split but assigning to meaningful convention)
x_train, y_train, x_test, y_test = train_images, train_labels_encoded, test_images, test_labels_encoded


In [8]:
y_train[10:20]

array([2, 4, 3, 2, 4, 0, 3, 3, 3, 2])

In [9]:
x_train.shape

(18914, 128, 128, 3)

In [10]:
 # Normalize pixel values to between 0 and 1

In [11]:
labels=np.unique(y_test)
labels

array([0, 1, 2, 3, 4, 5])

In [12]:
#One hot encode y values for neural network.
from tensorflow.keras.utils import to_categorical
y_train_one_hot = to_categorical(y_train)
y_test_one_hot = to_categorical(y_test)

# **Step-3:** Feature Extraction using ResNet50

In [13]:
resnet_model = ResNet50(weights='imagenet', include_top=False, input_shape=(128,128, 3))

In [14]:
#Make loaded layers as non-trainable. This is important as we want to work with pre-trained weights
for layer in resnet_model.layers:
	layer.trainable = False

resnet_model.summary()  #Trainable parameters will be 0

In [15]:
#Now, let us use features from convolutional network for RF
feature_extractor_resnet=resnet_model.predict(x_train)

[1m592/592[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m219s[0m 366ms/step


MemoryError: Unable to allocate 2.31 GiB for an array with shape (18914, 4, 4, 2048) and data type float32

In [None]:
fe_resnet_test=resnet_model.predict(x_test)


In [None]:
features_resnet = feature_extractor_resnet.reshape(feature_extractor_resnet.shape[0], -1)

In [None]:
f_resnet_test= fe_resnet_test.reshape(fe_resnet_test.shape[0],-1)

In [None]:
print(f_resnet_test.shape)

# **Step-4:** Classifications using Machine Learning Models

## **Step-4.1:** Classification using K-Nearest Neighbours

In [None]:
import math
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
sns.set_theme(style='whitegrid')

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [None]:
knn = KNeighborsClassifier(n_neighbors=10)

In [None]:
knn.fit(features_resnet, y_train)


In [None]:
y_pred=knn.predict(f_resnet_test)

## **Step-4.2:** Classification using Logistic Regression

In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [None]:
# define the multinomial logistic regression model
lrm = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=500)

In [None]:
from sklearn.decomposition import PCA

# Reduce features to 512 components (adjust as needed based on memory and performance)
pca = PCA(n_components=512)
features_resnet_reduced = pca.fit_transform(features_resnet)
f_resnet_test_reduced = pca.transform(f_resnet_test)

# Check the new shapes
print("Reduced Train Features Shape:", features_resnet_reduced.shape)
print("Reduced Test Features Shape:", f_resnet_test_reduced.shape)

# Fit Logistic Regression
lrm = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=500)
lrm.fit(features_resnet_reduced, y_train)


## **Step-4.3:** Classification using Random Forest

In [None]:
#RANDOM FOREST
from sklearn.ensemble import RandomForestClassifier
RF_model = RandomForestClassifier(n_estimators = 50, random_state = 42)

In [None]:
# Train the model on training data
RF_model.fit(features_resnet, y_train) #For sklearn no one hot encoding

In [None]:
print(RF_model.score(features_resnet, y_train))

In [None]:
#Now predict using the trained RF model.
prediction_RF = RF_model.predict(f_resnet_test)
#Inverse le transform to get original label back.
prediction_RF = le.inverse_transform(prediction_RF)

## **Step-4.5:** Classification using LightGBM


In [None]:
pip install lightgbm

In [None]:
import lightgbm as lgb

In [None]:
lightgbm_classifier =lgb.LGBMClassifier()

In [None]:
lightgbm_classifier.fit(features_resnet, y_train)

In [None]:
predictions = lightgbm_classifier.predict(f_resnet_test)
prediction_lightgbm = le.inverse_transform(predictions)

## **Step-4.6:** Classification using SVC- Support Vector Classifier

In [None]:
from sklearn.svm import SVC

In [None]:
svc = SVC(C=0.65, random_state=0, kernel='rbf')

In [None]:
svc.fit(features_resnet, y_train)

In [None]:
predictions = svc.predict(f_resnet_test)
prediction_SVC = le.inverse_transform(predictions)

## **Step-6:** Classification using Ensemble Model

### Ensemble Model-1


In [None]:
final_model = VotingClassifier(estimators=[('rf', RF_model), ('xgb', xgb_classifier),('knn',knn),('svc',svc),('lr',lrm)], voting='hard')


In [None]:
final_model.fit(features_resnet, y_train)

In [None]:
predictions = final_model.predict(f_resnet_test) # X_test_features
prediction_final = le.inverse_transform(predictions)

In [None]:
print("Accuracy of Model::",metrics.accuracy_score(test_labels, prediction_final))
print("Precision =", precision_score(test_labels, prediction_final,average='weighted'))
print("Recall =",recall_score(test_labels, prediction_final,average='weighted'))
print("F_1 =", f1_score(test_labels, prediction_final,average='weighted'))

In [None]:
print("Accuracy =",accuracy_score(test_labels, prediction_final))
print("Precision =", precision_score(test_labels, prediction_final,average=None))
print("Recall =",recall_score(test_labels, prediction_final,average=None))
print("F_1 =", f1_score(test_labels, prediction_final,average=None))

In [None]:
### Confusion Matrix
cm = confusion_matrix(test_labels, prediction_final)
import seaborn as sns
import matplotlib.pyplot as plt

class_names = labels

# Plot confusion matrix in a beautiful manner
fig = plt.figure(figsize=(7, 7))
ax= plt.subplot()
sns.heatmap(cm, annot=True, ax = ax, cmap="Blues", fmt = "g"); #annot=True to annotate cells
# labels, title and ticks
ax.set_xlabel('Predicted', fontsize=15)
ax.xaxis.set_label_position('bottom')
plt.xticks(rotation=90)
ax.xaxis.set_ticklabels(class_names, fontsize = 15)
ax.xaxis.tick_bottom()

ax.set_ylabel('True', fontsize=15)
ax.yaxis.set_ticklabels(class_names, fontsize = 15)
plt.yticks(rotation=0)

plt.title('Confusion Matrix- Ensemble (Feature Extractor -ResNet50)', fontsize=15)
plt.savefig('/content/drive/MyDrive/NIT Durgapur Internship 3/Output/Resnet50_Ensemble.png')
plt.show()

### Ensemble Model-2

In [None]:
final_model_2 = VotingClassifier(estimators=[('rf', RF_model), ('lgbm', lightgbm_classifier),('knn',knn),('svc',svc),('lr',lrm)], voting='hard')


In [None]:
final_model_2.fit(features_resnet, y_train)

In [None]:
predictions = final_model_2.predict(f_resnet_test) # X_test_features
prediction_final = le.inverse_transform(predictions)