In [6]:
!pip install kaggle
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split



In [7]:
from google.colab import files
files.upload()  # Upload your kaggle.json

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download Dataset
!kaggle datasets download -d kmader/finding-lungs-in-ct-data
!unzip finding-lungs-in-ct-data.zip -d lung_segmentation

Saving kaggle.json to kaggle (3).json
Dataset URL: https://www.kaggle.com/datasets/kmader/finding-lungs-in-ct-data
License(s): unknown
Archive:  finding-lungs-in-ct-data.zip
  inflating: lung_segmentation/2d_images.zip  
  inflating: lung_segmentation/2d_images/ID_0000_Z_0142.tif  
  inflating: lung_segmentation/2d_images/ID_0001_Z_0146.tif  
  inflating: lung_segmentation/2d_images/ID_0002_Z_0162.tif  
  inflating: lung_segmentation/2d_images/ID_0003_Z_0132.tif  
  inflating: lung_segmentation/2d_images/ID_0004_Z_0066.tif  
  inflating: lung_segmentation/2d_images/ID_0005_Z_0066.tif  
  inflating: lung_segmentation/2d_images/ID_0006_Z_0206.tif  
  inflating: lung_segmentation/2d_images/ID_0007_Z_0211.tif  
  inflating: lung_segmentation/2d_images/ID_0008_Z_0065.tif  
  inflating: lung_segmentation/2d_images/ID_0009_Z_0114.tif  
  inflating: lung_segmentation/2d_images/ID_0010_Z_0259.tif  
  inflating: lung_segmentation/2d_images/ID_0011_Z_0156.tif  
  inflating: lung_segmentation/2d_i

In [14]:
!unzip finding-lungs-in-ct-data.zip -d lung_segmentation

Archive:  finding-lungs-in-ct-data.zip
replace lung_segmentation/2d_images.zip? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [13]:
import os

for root, dirs, files in os.walk("lung_segmentation"):
    print(f"📁 {root}")
    for file in files[:5]:  # only list first 5 files to reduce output
        print("    └──", file)


📁 lung_segmentation
    └── 2d_images.zip
    └── 3d_images.zip
    └── lung_stats.csv
    └── 2d_masks.zip
📁 lung_segmentation/2d_masks
    └── ID_0139_Z_0056.tif
    └── ID_0234_Z_0096.tif
    └── ID_0030_Z_0302.tif
    └── ID_0090_Z_0119.tif
    └── ID_0183_Z_0242.tif
📁 lung_segmentation/3d_images
    └── IMG_0059.nii.gz
    └── IMG_0002.nii.gz
    └── MASK_0059.nii.gz
    └── MASK_0031.nii.gz
    └── MASK_0078.nii.gz
📁 lung_segmentation/2d_images
    └── ID_0139_Z_0056.tif
    └── ID_0234_Z_0096.tif
    └── ID_0030_Z_0302.tif
    └── ID_0090_Z_0119.tif
    └── ID_0183_Z_0242.tif


In [16]:
image_dir = "lung_segmentation/finding-lungs-in-ct-data/Lung Segmentation/images"
mask_dir = "lung_segmentation/finding-lungs-in-ct-data/Lung Segmentation/masks"

In [17]:
from glob import glob

image_paths = sorted(glob("lung_segmentation/**/*.tif", recursive=True))
print(image_paths[:5])

['lung_segmentation/2d_images/ID_0000_Z_0142.tif', 'lung_segmentation/2d_images/ID_0001_Z_0146.tif', 'lung_segmentation/2d_images/ID_0002_Z_0162.tif', 'lung_segmentation/2d_images/ID_0003_Z_0132.tif', 'lung_segmentation/2d_images/ID_0004_Z_0066.tif']


In [19]:
import os

for root, dirs, files in os.walk("lung_segmentation"):
    print(f"📁 {root}")
    for file in files[:5]:
        print("   └──", file)

📁 lung_segmentation
   └── 2d_images.zip
   └── 3d_images.zip
   └── lung_stats.csv
   └── 2d_masks.zip
📁 lung_segmentation/2d_masks
   └── ID_0139_Z_0056.tif
   └── ID_0234_Z_0096.tif
   └── ID_0030_Z_0302.tif
   └── ID_0090_Z_0119.tif
   └── ID_0183_Z_0242.tif
📁 lung_segmentation/3d_images
   └── IMG_0059.nii.gz
   └── IMG_0002.nii.gz
   └── MASK_0059.nii.gz
   └── MASK_0031.nii.gz
   └── MASK_0078.nii.gz
📁 lung_segmentation/2d_images
   └── ID_0139_Z_0056.tif
   └── ID_0234_Z_0096.tif
   └── ID_0030_Z_0302.tif
   └── ID_0090_Z_0119.tif
   └── ID_0183_Z_0242.tif


In [20]:
import cv2
import numpy as np
import os
from glob import glob

# This is the typical extracted structure
base_dir = "lung_segmentation/2D_lung_segmentation"

image_paths = sorted(glob(os.path.join(base_dir, "images", "*.tif")))
mask_paths = sorted(glob(os.path.join(base_dir, "masks", "*.tif")))

print(f"Found {len(image_paths)} images and {len(mask_paths)} masks")

images, masks = [], []

for img_path, mask_path in zip(image_paths, mask_paths):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

    if img is None or mask is None:
        continue  # Skip if either image or mask failed to load

    img = cv2.resize(img, (128, 128)) / 255.0
    mask = cv2.resize(mask, (128, 128)) / 255.0

    images.append(img)
    masks.append(mask)

# Convert to arrays
X = np.expand_dims(np.array(images), -1)
y = np.expand_dims(np.array(masks), -1)

print(f"Final dataset shape: X={X.shape}, y={y.shape}")

Found 0 images and 0 masks
Final dataset shape: X=(0, 1), y=(0, 1)


In [22]:
import os

for root, dirs, files in os.walk("lung_segmentation"):
    print(f"📁 {root}")
    for file in files[:5]:  # just show first 5 files per folder
        print("   └──", file)

📁 lung_segmentation
   └── 2d_images.zip
   └── 3d_images.zip
   └── lung_stats.csv
   └── 2d_masks.zip
📁 lung_segmentation/2d_masks
   └── ID_0139_Z_0056.tif
   └── ID_0234_Z_0096.tif
   └── ID_0030_Z_0302.tif
   └── ID_0090_Z_0119.tif
   └── ID_0183_Z_0242.tif
📁 lung_segmentation/3d_images
   └── IMG_0059.nii.gz
   └── IMG_0002.nii.gz
   └── MASK_0059.nii.gz
   └── MASK_0031.nii.gz
   └── MASK_0078.nii.gz
📁 lung_segmentation/2d_images
   └── ID_0139_Z_0056.tif
   └── ID_0234_Z_0096.tif
   └── ID_0030_Z_0302.tif
   └── ID_0090_Z_0119.tif
   └── ID_0183_Z_0242.tif


In [24]:
print("Number of images loaded:", len(X))
print("Number of masks loaded:", len(y))

Number of images loaded: 0
Number of masks loaded: 0


In [25]:
import os

for root, dirs, files in os.walk("lung_segmentation"):
    print(f"📁 {root}")
    for file in files:
        print("   └──", file)


📁 lung_segmentation
   └── 2d_images.zip
   └── 3d_images.zip
   └── lung_stats.csv
   └── 2d_masks.zip
📁 lung_segmentation/2d_masks
   └── ID_0139_Z_0056.tif
   └── ID_0234_Z_0096.tif
   └── ID_0030_Z_0302.tif
   └── ID_0090_Z_0119.tif
   └── ID_0183_Z_0242.tif
   └── ID_0093_Z_0123.tif
   └── ID_0108_Z_0259.tif
   └── ID_0239_Z_0365.tif
   └── ID_0003_Z_0132.tif
   └── ID_0248_Z_0062.tif
   └── ID_0133_Z_0070.tif
   └── ID_0037_Z_0150.tif
   └── ID_0063_Z_0067.tif
   └── ID_0240_Z_0066.tif
   └── ID_0184_Z_0089.tif
   └── ID_0228_Z_0097.tif
   └── ID_0112_Z_0085.tif
   └── ID_0212_Z_0064.tif
   └── ID_0078_Z_0058.tif
   └── ID_0122_Z_0066.tif
   └── ID_0017_Z_0061.tif
   └── ID_0134_Z_0137.tif
   └── ID_0154_Z_0070.tif
   └── ID_0080_Z_0213.tif
   └── ID_0056_Z_0163.tif
   └── ID_0041_Z_0336.tif
   └── ID_0166_Z_0072.tif
   └── ID_0082_Z_0278.tif
   └── ID_0263_Z_0093.tif
   └── ID_0187_Z_0145.tif
   └── ID_0055_Z_0122.tif
   └── ID_0025_Z_0112.tif
   └── ID_0053_Z_0127.tif
   └── ID

In [26]:
base_dir = "/content/finding-lungs-in-ct-data/2D_lung_segmentation"

In [27]:
import os, cv2, numpy as np
from glob import glob

base_dir = "lung_segmentation/2D_lung_segmentation"
image_paths = sorted(glob(os.path.join(base_dir, "images", "*.tif")))
mask_paths = sorted(glob(os.path.join(base_dir, "masks", "*.tif")))

print("🖼️ Total images:", len(image_paths))
print("🎭 Total masks:", len(mask_paths))

images, masks = [], []
for img_path, mask_path in zip(image_paths, mask_paths):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

    if img is None or mask is None:
        continue

    img = cv2.resize(img, (128, 128)) / 255.0
    mask = cv2.resize(mask, (128, 128)) / 255.0

    images.append(img)
    masks.append(mask)

X = np.expand_dims(np.array(images), -1)
y = np.expand_dims(np.array(masks), -1)

print("✅ X shape:", X.shape)
print("✅ y shape:", y.shape)


🖼️ Total images: 0
🎭 Total masks: 0
✅ X shape: (0, 1)
✅ y shape: (0, 1)


In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [29]:
def unet_model(input_size=(128,128,1)):
    inputs = Input(input_size)

    # Encoding
    c1 = Conv2D(16, 3, activation='relu', padding='same')(inputs)
    c1 = Conv2D(16, 3, activation='relu', padding='same')(c1)
    p1 = MaxPooling2D()(c1)

    c2 = Conv2D(32, 3, activation='relu', padding='same')(p1)
    c2 = Conv2D(32, 3, activation='relu', padding='same')(c2)
    p2 = MaxPooling2D()(c2)

    c3 = Conv2D(64, 3, activation='relu', padding='same')(p2)
    c3 = Conv2D(64, 3, activation='relu', padding='same')(c3)

    # Decoding
    u1 = UpSampling2D()(c3)
    u1 = concatenate([u1, c2])
    c4 = Conv2D(32, 3, activation='relu', padding='same')(u1)
    c4 = Conv2D(32, 3, activation='relu', padding='same')(c4)

    u2 = UpSampling2D()(c4)
    u2 = concatenate([u2, c1])
    c5 = Conv2D(16, 3, activation='relu', padding='same')(u2)
    c5 = Conv2D(16, 3, activation='relu', padding='same')(c5)

    outputs = Conv2D(1, 1, activation='sigmoid')(c5)

    return Model(inputs, outputs)

model = unet_model()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
history = model.fit(X_train, y_train, validation_split=0.1, epochs=10, batch_size=8)

In [None]:
# Predict on test images
preds = model.predict(X_test)

# Visualize predictions
n = 5
plt.figure(figsize=(12, 12))
for i in range(n):
    plt.subplot(n, 3, i*3 + 1)
    plt.title('Input')
    plt.imshow(X_test[i].squeeze(), cmap='gray')

    plt.subplot(n, 3, i*3 + 2)
    plt.title('Ground Truth')
    plt.imshow(y_test[i].squeeze(), cmap='gray')

    plt.subplot(n, 3, i*3 + 3)
    plt.title('Prediction')
    plt.imshow(preds[i].squeeze(), cmap='gray')
plt.tight_layout()
plt.show()