In [None]:
import kagglehub
import os

# Download latest version
dataset_path = kagglehub.dataset_download("tawsifurrahman/covid19-radiography-database")

# Step 2: Point to the folder that contains class subfolders (COVID, Normal, etc.)
data_path = os.path.join(dataset_path, "COVID-19_Radiography_Dataset")
print("Path to dataset files:",dataset_path)
train_data_path = os.path.join(dataset_path, 'train')
test_data_path = os.path.join(dataset_path, 'test') # Path to the test data
print("Path to dataset files:", dataset_path)

!pip install -q tensorflow
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
from glob import glob
import numpy as np
import matplotlib.pyplot as plt


# Configure ImageDataGenerator
# You might want a separate ImageDataGenerator for test data without augmentations
train_datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255) # Only rescaling for test data


def preprocess_image(image, label):
    image = tf.image.resize(image, (299, 299))  # resize
    if image.shape[-1] == 1:
        image = tf.image.grayscale_to_rgb(image)  # (299, 299, 1) → (299, 299, 3)
    image = image / 255.0  # normalize
    return image, label

# Step 3: Define ImageDataGenerators
datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # 20% for validation
)

# Step 4: Training data
train_data = datagen.flow_from_directory(
    data_path,
    target_size=(299, 299),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

# Step 5: Validation data
val_data = datagen.flow_from_directory(
    data_path,
    target_size=(299, 299),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

Path to dataset files: /kaggle/input/covid19-radiography-database
Path to dataset files: /kaggle/input/covid19-radiography-database
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m644.9/644.9 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.5/57.5 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.5/24.5 MB[0m [31m85.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m124.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.1/5.1 MB[0m [31m125.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.6/6.6 MB[0m [31m123.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import kagglehub
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.optimizers import Adam

# Step 1: Download dataset
print("\nDownloading dataset...")
dataset_path = kagglehub.dataset_download("tawsifurrahman/covid19-radiography-database")
data_path = os.path.join(dataset_path, "COVID-19_Radiography_Dataset")

# Step 2: Data generators with preprocessing
img_size = (299, 299)

train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2
)

train_data = train_datagen.flow_from_directory(
    data_path,
    target_size=img_size,
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

val_data = train_datagen.flow_from_directory(
    data_path,
    target_size=img_size,
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)

# Step 3: Load MobileNetV2 base model
print("\nLoading MobileNetV2 base model...")
base_model = MobileNetV2(input_shape=(299, 299, 3), include_top=False, weights='imagenet')
base_model.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
predictions = Dense(4, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

# Step 4: Compile
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Step 5: Train
print("\nTraining model...")
history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=25,
    steps_per_epoch=50,
    validation_steps=10
)


Downloading dataset...
Found 33866 images belonging to 4 classes.
Found 8464 images belonging to 4 classes.

Loading MobileNetV2 base model...


  base_model = MobileNetV2(input_shape=(299, 299, 3), include_top=False, weights='imagenet')


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step

Training model...


  self._warn_if_super_not_called()


Epoch 1/25
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 2s/step - accuracy: 0.2585 - loss: 1.7761 - val_accuracy: 0.4469 - val_loss: 1.2253
Epoch 2/25
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 3s/step - accuracy: 0.3599 - loss: 1.4887 - val_accuracy: 0.5094 - val_loss: 1.1202
Epoch 3/25
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 3s/step - accuracy: 0.4187 - loss: 1.3223 - val_accuracy: 0.5344 - val_loss: 1.0743
Epoch 4/25
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 3s/step - accuracy: 0.4386 - loss: 1.3563 - val_accuracy: 0.5906 - val_loss: 1.0225
Epoch 5/25
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 2s/step - accuracy: 0.4564 - loss: 1.2820 - val_accuracy: 0.5906 - val_loss: 1.0013
Epoch 6/25
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 2s/step - accuracy: 0.4500 - loss: 1.3313 - val_accuracy: 0.6438 - val_loss: 0.9167
Epoch 7/25
[1m50/50[0m [32m━━━━



[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 621ms/step - accuracy: 0.5983 - loss: 0.9786 - val_accuracy: 0.6844 - val_loss: 0.8136
Epoch 23/25
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 3s/step - accuracy: 0.5593 - loss: 1.0644 - val_accuracy: 0.6844 - val_loss: 0.7806
Epoch 24/25
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 2s/step - accuracy: 0.5851 - loss: 1.0197 - val_accuracy: 0.7000 - val_loss: 0.7671
Epoch 25/25
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 2s/step - accuracy: 0.6038 - loss: 0.9868 - val_accuracy: 0.7406 - val_loss: 0.7563


In [None]:
# Optional: Image preprocessing for deployment

def preprocess_input_image(img_path):
    img = load_img(img_path, target_size=img_size)
    img_array = img_to_array(img)
    img_array = tf.image.grayscale_to_rgb(img_array) if img_array.shape[-1] == 1 else img_array
    img_array = img_array / 255.0
    return np.expand_dims(img_array, axis=0)


In [None]:
model.save("covid_classification_model.h5")



In [None]:
from google.colab import files
files.download("covid_classification_model.h5")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>