In [46]:
import os
import pandas as pd
import sys



Data

In [None]:
import kagglehub

path = kagglehub.dataset_download("praveengovi/coronahack-chest-xraydataset")

print("Path to dataset files:", path)


In [35]:
df = pd.read_csv(DATA_DIR / "Chest_xray_Corona_Metadata"))


In [41]:
train_dir = DATA_DIR / "train"
test_dir = DATA_DIR / "test"

In [43]:
train_labels = df[df["Dataset_type"] == "TRAIN"].reset_index(drop=True)
test_labels  = df[df["Dataset_type"] == "TEST"].reset_index(drop=True)

Exploratory Data Analysis

In [None]:
import matplotlib.pyplot as plt

# Overall class distribution
class_counts = df['Label'].value_counts()

plt.figure(figsize=(5,4))
class_counts.plot(kind='bar')
plt.title("Class Distribution (Normal vs COVID)")
plt.xlabel("Class")
plt.ylabel("Number of Images")
plt.xticks(rotation=0)
plt.show()


In [None]:
split_counts = df.groupby(['Dataset_type', 'Label']).size().unstack()

split_counts.plot(kind='bar', figsize=(6,4))
plt.title("Train/Test Class Distribution")
plt.xlabel("Dataset Split")
plt.ylabel("Number of Images")
plt.xticks(rotation=0)
plt.show()


In [39]:
import os
import cv2
import random
import matplotlib.pyplot as plt

def show_sample_images(df, image_dir, label_name, n=4):
    samples = df[df['Label'] == label_name].sample(n)

    plt.figure(figsize=(10,4))
    for i, (_, row) in enumerate(samples.iterrows()):
        img_path = os.path.join(image_dir, row['X_ray_image_name'])
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        plt.subplot(1, n, i+1)
        plt.imshow(img)
        plt.axis('off')
        plt.title(label_name)

    plt.show()


In [None]:
# Show Normal samples
show_sample_images(train_labels, train_dir, "Normal", n=1)




In [None]:
# Show Covid/Pneumonia/Pnemonia(typo in dataset) samples
show_sample_images(train_labels, train_dir, "Pnemonia", n=2)

Training

In [22]:

from src.data import create_generators
from src.model import build_model
from src.train import train_frozen, fine_tune
from src.evaluate import evaluate

print("Imports successful")


Imports successful


In [None]:
train_gen, val_gen, test_gen = create_generators(
    csv_path= DATA_DIR / "Chest_xray_Corona_Metadata" ,
    train_dir= train_dir,
    test_dir=test_dir,

    batch_size=32
)


In [None]:
model, base_model = build_model()
model.summary()


In [None]:
history_frozen = train_frozen(
    model,
    train_gen,
    val_gen,
    epochs=6
)


In [None]:
history_finetune = fine_tune(
    model,
    base_model,
    train_gen,
    val_gen,
    epochs=8
)


Evaluation

In [None]:
test_loss, test_acc = evaluate(model, test_gen)
print("Final Test Accuracy:", test_acc)
