CODE FOR TRAINING THE MODEL

LIBRARY IMPORTS

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout

LOADING DATA

In [2]:
data_dirs = {
    "URTI": r"C:\Users\Jiary\Documents\GitHub\ML\Spectograms\URTI",
    "Healthy": r"C:\Users\Jiary\Documents\GitHub\ML\Spectograms\Healthy",
    "COPD": r"C:\Users\Jiary\Documents\GitHub\ML\Spectograms\COPD",
    "Bronchiectasis": r"C:\Users\Jiary\Documents\GitHub\ML\Spectograms\Bronchiectasis",
    "Pneumonia": r"C:\Users\Jiary\Documents\GitHub\ML\Spectograms\Pneumonia",
    "Bronchiolitis": r"C:\Users\Jiary\Documents\GitHub\ML\Spectograms\Bronchiolitis"
}

for disease, path in data_dirs.items():
    print(f"Loaded {disease} data directory: {path}")

# Load data and labels
X = []  # Spectrograms
y = []  # Labels

for label, folder_path in data_dirs.items():
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".npy"):
            file_path = os.path.join(folder_path, file_name)
            data = np.load(file_path)  # Shape: (128, 128, 1)
            X.append(data)
            y.append(label)

# Convert to numpy arrays
X = np.array(X)  # Shape: (n_samples, 128, 128, 1)
y = np.array(y)  # Shape: (n_samples,)

Loaded URTI data directory: C:\Users\Jiary\Documents\GitHub\ML\Spectograms\URTI
Loaded Healthy data directory: C:\Users\Jiary\Documents\GitHub\ML\Spectograms\Healthy
Loaded COPD data directory: C:\Users\Jiary\Documents\GitHub\ML\Spectograms\COPD
Loaded Bronchiectasis data directory: C:\Users\Jiary\Documents\GitHub\ML\Spectograms\Bronchiectasis
Loaded Pneumonia data directory: C:\Users\Jiary\Documents\GitHub\ML\Spectograms\Pneumonia
Loaded Bronchiolitis data directory: C:\Users\Jiary\Documents\GitHub\ML\Spectograms\Bronchiolitis


ENCODING LABELS

In [3]:
 # Convert labels to integers
label_encoder = LabelEncoder()
label_encoder.fit(y)  # Fit on all labels in the dataset
# Step 2: Encode the labels
y_encoded = label_encoder.transform(y)
# Example mapping
print(f'Total spectrograms: {len(X)}')
print("Label mapping:", dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_))))

Total spectrograms: 8825
Label mapping: {'Bronchiectasis': 0, 'Bronchiolitis': 1, 'COPD': 2, 'Healthy': 3, 'Pneumonia': 4, 'URTI': 5}


SPLIT TRAIN-EVALUATE-TEST DATA

In [4]:
# Stratified split into training (70%) and temporary set (30%)
X_train, X_temp, y_train, y_temp = train_test_split(X, y_encoded, test_size=0.3, random_state=42, stratify=y_encoded)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)
# Print sizes
print(f"Training set: {X_train.shape[0]} samples")
print(f"Validation set: {X_val.shape[0]} samples")
print(f"Testing set: {X_test.shape[0]} samples\n\n")
# Function to count samples per class
def count_samples(y_encoded, label_encoder):
    unique, counts = np.unique(y_encoded, return_counts=True)
    for cls, count in zip(unique, counts):
        print(f"Class: {label_encoder.inverse_transform([cls])[0]}, Samples: {count}")

# Print distribution in training set
print("Training Set Distribution:")
count_samples(y_train, label_encoder)

# Print distribution in validation set
print("\nValidation Set Distribution:")
count_samples(y_val, label_encoder)

# Print distribution in testing set
print("\nTesting Set Distribution:")
count_samples(y_test, label_encoder)


Training set: 6177 samples
Validation set: 1324 samples
Testing set: 1324 samples


Training Set Distribution:
Class: Bronchiectasis, Samples: 349
Class: Bronchiolitis, Samples: 364
Class: COPD, Samples: 4022
Class: Healthy, Samples: 498
Class: Pneumonia, Samples: 490
Class: URTI, Samples: 454

Validation Set Distribution:
Class: Bronchiectasis, Samples: 75
Class: Bronchiolitis, Samples: 78
Class: COPD, Samples: 862
Class: Healthy, Samples: 107
Class: Pneumonia, Samples: 105
Class: URTI, Samples: 97

Testing Set Distribution:
Class: Bronchiectasis, Samples: 75
Class: Bronchiolitis, Samples: 78
Class: COPD, Samples: 862
Class: Healthy, Samples: 107
Class: Pneumonia, Samples: 105
Class: URTI, Samples: 97


CNN-MODEL

In [5]:
# Define the CNN model
model = Sequential([
    # Convolutional layer
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)),
    MaxPooling2D((2, 2)),
    
    # Second convolutional layer
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    
    # Third convolutional layer
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    
    # Flatten the output
    Flatten(),
    
    # Fully connected layers
    Dense(128, activation='relu'),
    Dropout(0.5),  # Dropout for regularization
    Dense(64, activation='relu'),
    
    # Output layer (6 classes for your dataset)
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()
print(f"X_train dtype: {X_train.dtype}")
print(f"y_train dtype: {y_train.dtype}")
print(f"X_test dtype: {X_test.dtype}")
print(f"y_test dtype: {y_test.dtype}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


X_train dtype: float32
y_train dtype: int32
X_test dtype: float32
y_test dtype: int32


TRAIN MODEL

In [20]:
history = model.fit(X_train, y_train, epochs=25, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/25
[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m176s[0m 896ms/step - accuracy: 0.8940 - loss: 0.2723 - val_accuracy: 0.8074 - val_loss: 0.7396
Epoch 2/25
[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m231s[0m 1s/step - accuracy: 0.7503 - loss: 0.8136 - val_accuracy: 0.6760 - val_loss: 0.8329
Epoch 3/25
[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m219s[0m 965ms/step - accuracy: 0.7110 - loss: 0.7877 - val_accuracy: 0.7644 - val_loss: 0.6162
Epoch 4/25
[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 945ms/step - accuracy: 0.8168 - loss: 0.4864 - val_accuracy: 0.8482 - val_loss: 0.4610
Epoch 5/25
[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 864ms/step - accuracy: 0.8735 - loss: 0.3364 - val_accuracy: 0.8671 - val_loss: 0.4114
Epoch 6/25
[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m174s[0m 720ms/step - accuracy: 0.8964 - loss: 0.2787 - val_accuracy: 0.8716 - val_loss: 0.4298
Epoch 7

EVALUATE

In [21]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_accuracy}')

[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 161ms/step - accuracy: 0.8696 - loss: 0.5846
Test Accuracy: 0.8572507500648499


PLOTS

In [9]:
# Generate classification report
print(classification_report(y_test, y_pred_classes, target_names=label_encoder.classes_))
# Predict on the test set
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Generate confusion matrix
cm = confusion_matrix(y_test, y_pred_classes)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()
# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()


NameError: name 'y_pred_classes' is not defined

SAVE MODEL

In [17]:
model.save('respiratory_cnn_model.h5')



In [18]:
import os

# Get the absolute path of the saved model
model_path = os.path.abspath('respiratory_cnn_model.h5')
print(f"The model is saved at: {model_path}")

The model is saved at: c:\Users\Jiary\Documents\GitHub\ML\Project\respiratory_cnn_model.h5
