In [None]:
# Lung Disease Classification using Custom Models and CatBoost

#This notebook demonstrates the process of classifying lung diseases using custom deep learning models and the CatBoost classifier. The process involves loading images, feature extraction using Hierarchical GCN (HGCN) and DeepenShuffleNet, and finally classification.

## Setup

First, import all necessary libraries.


In [None]:
import os
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, GlobalAveragePooling2D, Conv2D
from tensorflow.keras.applications import ShuffleNetV2
from spektral.layers import GraphConv
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score, cohen_kappa_score, matthews_corrcoef

In [None]:
def HGCN(input_shape, num_classes):
    X_in = Input(shape=input_shape)
    graph_conv1 = GraphConv(64, activation='relu')([X_in, X_in])
    graph_conv2 = GraphConv(64, activation='relu')([graph_conv1, X_in])
    flatten = Flatten()(graph_conv2)
    dense = Dense(128, activation='relu')(flatten)
    output = Dense(num_classes, activation='softmax')(dense)
    model = Model(inputs=X_in, outputs=output)
    return model

def DeepenShuffleNet(input_shape, num_classes):
    base_model = ShuffleNetV2(include_top=False, input_shape=input_shape, weights='imagenet')
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    return model


In [None]:
def load_images_from_folder(folder):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        if img_path.endswith(".png"):
            img = load_img(img_path, target_size=(128, 128, 3))
            img_array = img_to_array(img)
            images.append(img_array)
            labels.append(folder.split('/')[-1])
    return np.array(images), labels


In [None]:
def feature_extraction(images, model_func, num_classes):
    model = model_func(images[0].shape, num_classes)
    features = model.predict(images)
    return features


In [None]:
def calculate_metrics(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    recall = tp / (tp + fn) if (tp + fn) != 0 else 0
    fpr = fp / (fp + tn) if (fp + tn) != 0 else 0
    precision = tp / (tp + fp) if (tp + fp) != 0 else 0
    f1 = f1_score(y_true, y_pred)
    kappa = cohen_kappa_score(y_true, y_pred)
    mcc = matthews_corrcoef(y_true, y_pred)
    return {
        "Recall": recall,
        "False Positive Rate": fpr,
        "Precision": precision,
        "F1 Score": f1,
        "Cohen's Kappa": kappa,
        "Matthews Correlation Coefficient": mcc
    }


In [None]:
def main():
    diseases = ['Healthy', 'COPD', 'Asthma', 'Pneumonia', 'URTI', 'Bronchiectasis', 'Bronchiolitis', 'LRTI']
    augmented_path = 'c://sampledata//augmented'

    all_features = []
    all_labels = []
    for disease in diseases:
        folder_path = os.path.join(augmented_path, disease)
        images, labels = load_images_from_folder(folder_path)

        hgcn_features = feature_extraction(images, HGCN, len(diseases))
        deep_shuffle_net_features = feature_extraction(images, DeepenShuffleNet, len(diseases))
        combined_features = np.concatenate((hgcn_features, deep_shuffle_net_features), axis=1)

        all_features.append(combined_features)
        all_labels.extend(labels)

    all_features = np.vstack(all_features)
    all_labels = np.array(all_labels)

    X_train, X_test, y_train, y_test = train_test_split(all_features, all_labels, test_size=0.2, random_state=42)
    catboost_model = CatBoostClassifier(iterations=100, learning_rate=0.1, depth=3, verbose=False)
    catboost_model.fit(X_train, y_train)
    y_pred = catboost_model.predict(X_test)
    metrics = calculate_metrics(y_test, y_pred)
    for metric, value in metrics.items():
        print(f'{metric}: {value:.3f}')

if __name__ == "__main__":
    main()
