Radiography
COVID-19 Radiography Database
Dataset Description
The COVID-19 Radiography Database is a collection of chest X-ray images classified into four categories:
- COVID: Images of patients with COVID-19.
- Lung Opacity: Images with lung opacities.
- Normal: Normal chest X-ray images.
- Viral Pneumonia: Images of viral pneumonia.

[https://www.kaggle.com/datasets/tawsifurrahman/covid19-radiography-database/data](http://)

1.Labels and Images

In [None]:
import os
from tensorflow.keras.utils import to_categorical
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
base_path = '/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset'

In [None]:
categories = ['COVID', 'Lung_Opacity', 'Normal', 'Viral Pneumonia']

In [None]:
image_paths = []
labels = []

In [None]:
count = 0
for category in categories:
    category_dir = os.path.join(base_path, category)
    #print(category_dir) 
    for root, dirs, files in os.walk(category_dir):
        if 'mask' in root.lower():  # Si los nombres de las carpetas con máscaras contienen 'mask'
            continue
        print(root)
        for file in files:
            if file.endswith('.jpg') or file.endswith('.png'):
                image_paths.append(os.path.join(root, file))
                labels.append(categories.index(category))
                #print(category)
print(f"Total images found: {len(image_paths)}")
print(f"Total labels assigned: {len(labels)}")
#/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset/COVID
#/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset/Lung_Opacity

In [None]:
labels = to_categorical(labels, num_classes=len(categories))

In [None]:
from PIL import Image

images = []
for path in image_paths:
    img = Image.open(path).convert('RGB')
    img = img.resize((128, 128))  # Resize to 128x128 pixels (or desired size)
    img_array = np.array(img)
    images.append(img_array)

**Trainning and Testing**

In [None]:
images = np.array(images)
labels = np.array(labels)

print(f'Total number of images : {len(images)}')
print(f'Label size : {labels.shape}')

In [None]:
train_images, test_images, train_labels, test_labels = train_test_split(images, labels, test_size=0.2, random_state=42)
train_images, val_images, train_labels, val_labels = train_test_split(train_images, train_labels, test_size=0.2, random_state=42)

In [None]:
print(f"Training set size: {len(train_images)}")
print(f"Validation set size: {len(val_images)}")
print(f"Test set size: {len(test_images)}")

In [None]:
train_images = np.array(train_images)
val_images = np.array(val_images)
test_images = np.array(test_images)

In [None]:
train_images = train_images / 255.0
val_images = val_images / 255.0
test_images = test_images / 255.0

Training and Testing

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Define the model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(categories), activation='softmax')  # Use softmax for multi-class classification
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Define Early Stopping
early = EarlyStopping(monitor='accuracy', patience=5, restore_best_weights=True)

# Model summary
model.summary()

# Train the model with Early Stopping
history = model.fit(train_images, train_labels, validation_data=(val_images, val_labels) , epochs=60, batch_size=32, callbacks=[early])

In [None]:
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt

In [None]:
y_pred_prob = model.predict(test_images, verbose=0)
y_pred = np.argmax(y_pred_prob, axis=1)
y_true = np.argmax(test_labels, axis=1)

In [None]:
y_true

In [None]:
print("Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))

In [None]:
for i in range(len(categories)):
    print(f"Precision Score (Class {i}):", precision_score(y_true, y_pred, labels=[i], average='macro'))
    print(f"Recall Score (Class {i}):", recall_score(y_true, y_pred, labels=[i], average='macro'))
    print(f"F1 Score (Class {i}):", f1_score(y_true, y_pred, labels=[i], average='macro'))

In [None]:
print()
print("Accuracy:", model.evaluate(test_images, test_labels, verbose=0)[1])
print("Precision Score (Macro):", precision_score(y_true, y_pred, average='macro'))
print("Recall Score (Macro):", recall_score(y_true, y_pred, average='macro'))
print("F1 Score (Macro):", f1_score(y_true, y_pred, average='macro'))

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Accuracy and Loss')
plt.xlabel('Epoch')
plt.ylabel('Accuracy / Loss')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score
plt.figure(figsize=(12, 8))
for i in range(len(categories)):
    fpr, tpr, _ = roc_curve(y_true == i, y_pred_prob[:, i])
    auc = roc_auc_score(y_true == i, y_pred_prob[:, i])
    plt.plot(fpr, tpr, label=f'Class {i} (AUC = {auc:.2f})')

plt.plot([0, 1], [0, 1], 'r--')
plt.title('ROC CURVE')
plt.xlabel('False Positives')
plt.ylabel('True Positives')
plt.legend()
plt.grid(True)
plt.show()