In [88]:
import os
import glob
import numpy as np
import pandas as pd
import tensorflow as tf
import albumentations as A
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [89]:
train_dir = '/kaggle/input/bsu-dataset/ysda-bsu-2023-cnn-contest/chest_xray/chest_xray/train'
test_dir = '/kaggle/input/bsu-dataset/ysda-bsu-2023-cnn-contest/chest_xray/chest_xray/test'
submission_file = '/kaggle/input/bsu-dataset/ysda-bsu-2023-cnn-contest/sample_submission.csv'

In [91]:
img_size = (224, 224)
batch_size = 32

train_gen = ImageDataGenerator(
    rescale=1/255.,
    horizontal_flip=True,
    vertical_flip=True,
    rotation_range=0.4,
    zoom_range=0.4
)

In [95]:
train_generator = train_gen.flow_from_directory(
    train_dir,
    target_size=img_size,
    batch_size=batch_size,
    shuffle=True
)

Found 5232 images belonging to 2 classes.


In [96]:
model = Sequential()
model.add(Conv2D(filters=128, kernel_size=(3,3), activation='relu', input_shape=(224, 224, 3)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=128, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(2, activation='softmax'))

In [97]:
model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

history = model.fit(
    train_generator,
    epochs=20
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [98]:
def preprocess_image(image):
    image = image / 255.0
    return image

def load_image(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, img_size)
    return image

image_paths = glob.glob(os.path.join(test_dir, "*.jpeg"))
dataset = tf.data.Dataset.from_tensor_slices(image_paths)
image_dataset = dataset.map(load_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)
image_dataset = image_dataset.map(preprocess_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)
image_array = tf.stack(list(image_dataset), axis=0)

In [99]:
predictions = model.predict(image_array)
predictions



array([[4.8155853e-01, 5.1844150e-01],
       [6.1896563e-01, 3.8103443e-01],
       [1.9467810e-01, 8.0532187e-01],
       ...,
       [9.9575859e-01, 4.2413515e-03],
       [9.9736100e-01, 2.6389707e-03],
       [9.9948752e-01, 5.1250897e-04]], dtype=float32)

In [79]:
# Print results in CSV format and upload to Kaggle
with open('pred_results.csv', 'w') as f:
    f.write('filename,probability\n')
    for i, prob in enumerate(predictions):
        s = os.path.basename(image_paths[i])
        print(f"test/{s},{prob[1]}", file=f)