In [None]:
#pip install torch torchvision scikit-learn numpy Pillow

!pip install deap
#!pip install torchsummary


In [None]:
from google.colab import drive

drive.mount('/content/drive')

In [None]:
# path of dataset file
import os
dataset = "/content/drive/My Drive/lung_colon_image_set/colon_image_sets/"
print(os.listdir(dataset))


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.regularizers import l2

from PIL import Image
from torchvision import transforms
from PIL import Image

from deap import base, creator, tools, algorithms
import random
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from pathlib import Path

In [None]:
def build_cnn_extractor():
    model = Sequential([
        Conv2D(64, kernel_size=(3,3), input_shape=(224,224,3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2,2)),

        Conv2D(128, kernel_size=(3,3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2,2)),

        Conv2D(256, kernel_size=(3,3), activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling2D((2,2)),

        GlobalAveragePooling2D(),  # GlobalAveragePooling2D instead of Flatten
        Dense(128, activation='relu'),
        Dropout(0.4),
    ])
    return model

cnn_extractor = build_cnn_extractor()


In [None]:
def extract_features(model, dataset_path):
    classes = os.listdir(dataset_path)
    features_list = []
    labels_list = []

    for label, class_dir in enumerate(classes):
        class_path = os.path.join(dataset_path, class_dir)
        for image_name in os.listdir(class_path):
            image_path = os.path.join(class_path, image_name)
            image = tf.keras.preprocessing.image.load_img(image_path, target_size=(224, 224))
            image = tf.keras.preprocessing.image.img_to_array(image)
            image = np.expand_dims(image, axis=0)  # Expand dims to (1, 224, 224, 3)

            features = model.predict(image)  # Predict without manually setting learning phase
            features_list.append(features.flatten())  # Flatten to a 1D array
            labels_list.append(label)

    features = np.vstack(features_list)
    labels = np.array(labels_list)
    return features, labels

In [None]:
features, labels = extract_features(cnn_extractor, dataset)

print(f'Features shape: {features.shape}')
print(f'Labels shape: {labels.shape}')

In [None]:
!pip install pymoo

In [None]:
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.core.problem import Problem
from pymoo.problems import get_problem
from pymoo.operators.crossover.pntx import TwoPointCrossover
from pymoo.operators.mutation.bitflip import BitflipMutation
from pymoo.operators.sampling.rnd import BinaryRandomSampling
from pymoo.optimize import minimize
from pymoo.visualization.scatter import Scatter
import joblib
import multiprocessing



In [None]:
# Initialize lists to store accuracy
accuracy_svm_list = []
accuracy_dt_list = []
accuracy_knn_list = []

class FeatureSelectionProblem(Problem):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
        super().__init__(n_var=features.shape[1], n_obj=3, xl=0, xu=1, type_var=np.bool_)  # Updated to np.bool_

    def _evaluate(self, x, out, *args, **kwargs):
        res = []
        for individual in x:
            selected_features = np.where(individual)[0]
            if len(selected_features) == 0:
                res.append([0, 0, 0])  # Invalid solution
                continue
            X_train, X_test, y_train, y_test = train_test_split(self.features[:, selected_features], self.labels, test_size=0.3, random_state=42)

            # SVM
            svm = SVC(kernel='linear')
            svm.fit(X_train, y_train)
            y_pred = svm.predict(X_test)
            accuracy_svm = accuracy_score(y_test, y_pred)

            # Decision Tree
            dt = DecisionTreeClassifier()
            dt.fit(X_train, y_train)
            y_pred = dt.predict(X_test)
            accuracy_dt = accuracy_score(y_test, y_pred)

            # KNN
            knn = KNeighborsClassifier(n_neighbors=3)
            knn.fit(X_train, y_train)
            y_pred = knn.predict(X_test)
            accuracy_knn = accuracy_score(y_test, y_pred)

            # Store accuracy for plots
            accuracy_svm_list.append(accuracy_svm)
            accuracy_dt_list.append(accuracy_dt)
            accuracy_knn_list.append(accuracy_knn)

            res.append([1-accuracy_svm, 1-accuracy_dt, 1-accuracy_knn])

        out['F'] = np.array(res)


In [None]:

problem = FeatureSelectionProblem(features, labels)



In [None]:
# Parallel processing setup
pool = multiprocessing.Pool()
toolbox = base.Toolbox()
toolbox.register("map", pool.map)


In [None]:
algorithm = NSGA2(pop_size=25,
                  sampling=BinaryRandomSampling(),
                  crossover=TwoPointCrossover(),
                  mutation=BitflipMutation(),
                  eliminate_duplicates=True)


In [None]:

res = minimize(problem,
               algorithm,
               ('n_gen', 25),
               seed=1,
               save_history=True,
               verbose=True,
                eliminate_duplicates=True)

In [None]:
# Plotting the accuracy curves
plt.figure(figsize=(12, 8))
plt.plot(accuracy_svm_list, label="SVM Accuracy")
plt.plot(accuracy_dt_list, label="Decision Tree Accuracy")
plt.plot(accuracy_knn_list, label="KNN Accuracy")
plt.xlabel("Generations")
plt.ylabel("Accuracy")
plt.title("Classifier Accuracy during NSGA-II Optimization")
plt.legend()
plt.show()

In [None]:


# Extract best individuals
best_individuals = res.X
best_individual = best_individuals[0]
selected_features = np.where(best_individual)[0]
print(f'Selected Features: {selected_features}')

In [None]:
# Select features based on NSGA-II results
X_selected = features[:, selected_features]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_selected, labels, test_size=0.3, random_state=42)



In [None]:

# Assuming X_train, X_test, y_train, y_test are already defined

# Function to plot ROC curve
def plot_roc_curve(fpr, tpr, roc_auc, title):
    plt.figure()
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(title)
    plt.legend(loc="lower right")
    plt.show()

# Train and evaluate SVM
svm = SVC(kernel='linear', probability=True)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)
y_score_svm = svm.decision_function(X_test)

# Compute metrics for SVM
accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm)
recall_svm = recall_score(y_test, y_pred_svm)
f1_svm = f1_score(y_test, y_pred_svm)
fpr_svm, tpr_svm, _ = roc_curve(y_test, y_score_svm)
roc_auc_svm = auc(fpr_svm, tpr_svm)

print(f'SVM Accuracy: {accuracy_svm:.4f}')
print(f'SVM Precision: {precision_svm:.4f}')
print(f'SVM Recall: {recall_svm:.4f}')
print(f'SVM F1 Score: {f1_svm:.4f}')
print(f'SVM AUC: {roc_auc_svm:.4f}')

# Plot ROC curve for SVM
plot_roc_curve(fpr_svm, tpr_svm, roc_auc_svm, 'SVM ROC Curve')

# Confusion matrix for SVM
cm_svm = confusion_matrix(y_test, y_pred_svm)
disp_svm = ConfusionMatrixDisplay(confusion_matrix=cm_svm)
disp_svm.plot()
plt.title('SVM Confusion Matrix')
plt.show()

# Train and evaluate Decision Tree
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)
y_score_dt = dt.predict_proba(X_test)[:, 1]

# Compute metrics for Decision Tree
accuracy_dt = accuracy_score(y_test, y_pred_dt)
precision_dt = precision_score(y_test, y_pred_dt)
recall_dt = recall_score(y_test, y_pred_dt)
f1_dt = f1_score(y_test, y_pred_dt)
fpr_dt, tpr_dt, _ = roc_curve(y_test, y_score_dt)
roc_auc_dt = auc(fpr_dt, tpr_dt)

print(f'Decision Tree Accuracy: {accuracy_dt:.4f}')
print(f'Decision Tree Precision: {precision_dt:.4f}')
print(f'Decision Tree Recall: {recall_dt:.4f}')
print(f'Decision Tree F1 Score: {f1_dt:.4f}')
print(f'Decision Tree AUC: {roc_auc_dt:.4f}')

# Plot ROC curve for Decision Tree
plot_roc_curve(fpr_dt, tpr_dt, roc_auc_dt, 'Decision Tree ROC Curve')

# Confusion matrix for Decision Tree
cm_dt = confusion_matrix(y_test, y_pred_dt)
disp_dt = ConfusionMatrixDisplay(confusion_matrix=cm_dt)
disp_dt.plot()
plt.title('Decision Tree Confusion Matrix')
plt.show()

# Train and evaluate k-NN
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
y_score_knn = knn.predict_proba(X_test)[:, 1]

# Compute metrics for k-NN
accuracy_knn = accuracy_score(y_test, y_pred_knn)
precision_knn = precision_score(y_test, y_pred_knn)
recall_knn = recall_score(y_test, y_pred_knn)
f1_knn = f1_score(y_test, y_pred_knn)
fpr_knn, tpr_knn, _ = roc_curve(y_test, y_score_knn)
roc_auc_knn = auc(fpr_knn, tpr_knn)

print(f'k-NN Accuracy: {accuracy_knn:.4f}')
print(f'k-NN Precision: {precision_knn:.4f}')
print(f'k-NN Recall: {recall_knn:.4f}')
print(f'k-NN F1 Score: {f1_knn:.4f}')
print(f'k-NN AUC: {roc_auc_knn:.4f}')

# Plot ROC curve for k-NN
plot_roc_curve(fpr_knn, tpr_knn, roc_auc_knn, 'k-NN ROC Curve')

# Confusion matrix for k-NN
cm_knn = confusion_matrix(y_test, y_pred_knn)
disp_knn = ConfusionMatrixDisplay(confusion_matrix=cm_knn)
disp_knn.plot()
plt.title('k-NN Confusion Matrix')
plt.show()


In [None]:

# Don't forget to close the pool when you're done
pool.close()
pool.join()