In [3]:
# Install required libraries
!pip install numpy pandas scikit-learn matplotlib seaborn tensorflow keras

# Import common ML libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras




In [2]:
# Set paths for data directories
train_ct_path = r"X:\Cursor\LiTS_Venv\LiTS(train_test)\train_CT"
train_mask_path = r"X:\Cursor\LiTS_Venv\LiTS(train_test)\train_mask"
test_ct_path = r"X:\Cursor\LiTS_Venv\LiTS(train_test)\test_CT" 
test_mask_path = r"X:\Cursor\LiTS_Venv\LiTS(train_test)\test_mask"

import numpy as np
import os
import nibabel as nib
from tqdm import tqdm
import cv2

def process_nifti_files(ct_path, mask_path, output_path, is_training=True):
    """
    Process .nii files and convert them to normalized numpy arrays
    """
    # Create output directories if they don't exist
    os.makedirs(output_path, exist_ok=True)
    ct_output = os.path.join(output_path, 'ct_scans')
    mask_output = os.path.join(output_path, 'masks')
    os.makedirs(ct_output, exist_ok=True)
    os.makedirs(mask_output, exist_ok=True)

    # Get list of files
    ct_files = sorted([f for f in os.listdir(ct_path) if f.endswith('.nii')])
    
    print(f"Processing {'training' if is_training else 'testing'} data...")
    
    for ct_file in tqdm(ct_files):
        try:
            # Load CT scan
            ct_nifti = nib.load(os.path.join(ct_path, ct_file))
            ct_data = ct_nifti.get_fdata()
            
            # Load corresponding mask
            mask_file = f"segmentation-{ct_file.split('-')[1]}"
            mask_nifti = nib.load(os.path.join(mask_path, mask_file))
            mask_data = mask_nifti.get_fdata()
            
            # Normalize CT data to 0-1 range
            ct_data = (ct_data - ct_data.min()) / (ct_data.max() - ct_data.min())
            
            # Convert mask to binary (0 or 1)
            mask_data = (mask_data > 0).astype(np.float32)
            
            # Save each slice as a separate file
            for slice_idx in range(ct_data.shape[2]):
                ct_slice = ct_data[:,:,slice_idx]
                mask_slice = mask_data[:,:,slice_idx]
                
                # Resize to standard size (e.g., 256x256)
                ct_slice = cv2.resize(ct_slice, (256, 256))
                mask_slice = cv2.resize(mask_slice, (256, 256))
                
                # Save as numpy arrays
                slice_name = f"{ct_file.split('.')[0]}_slice_{slice_idx}"
                np.save(os.path.join(ct_output, f"{slice_name}.npy"), ct_slice)
                np.save(os.path.join(mask_output, f"{slice_name}.npy"), mask_slice)
                
        except Exception as e:
            print(f"Error processing {ct_file}: {str(e)}")

# Process training data
train_output = "processed_data/train"
process_nifti_files(train_ct_path, train_mask_path, train_output, is_training=True)

# Process testing data
test_output = "processed_data/test"
process_nifti_files(test_ct_path, test_mask_path, test_output, is_training=False)

print("\nData processing complete!")

# Create data generators for training
def create_data_generator(data_path, batch_size=32):
    ct_path = os.path.join(data_path, 'ct_scans')
    mask_path = os.path.join(data_path, 'masks')
    
    ct_files = sorted([f for f in os.listdir(ct_path) if f.endswith('.npy')])
    
    while True:
        # Shuffle files at the start of each epoch
        np.random.shuffle(ct_files)
        
        for i in range(0, len(ct_files), batch_size):
            batch_files = ct_files[i:i + batch_size]
            
            ct_batch = []
            mask_batch = []
            
            for f in batch_files:
                ct = np.load(os.path.join(ct_path, f))
                mask = np.load(os.path.join(mask_path, f))
                
                ct_batch.append(ct)
                mask_batch.append(mask)
            
            ct_batch = np.array(ct_batch)
            mask_batch = np.array(mask_batch)
            
            # Add channel dimension if needed
            ct_batch = np.expand_dims(ct_batch, -1)
            mask_batch = np.expand_dims(mask_batch, -1)
            
            yield ct_batch, mask_batch

# Create generators
train_generator = create_data_generator("processed_data/train")
test_generator = create_data_generator("processed_data/test")

print("Data generators created and ready for model training!")


Processing training data...


100%|██████████| 111/111 [09:25<00:00,  5.09s/it]


Processing testing data...


100%|██████████| 20/20 [03:06<00:00,  9.32s/it]



Data processing complete!
Data generators created and ready for model training!


In [3]:
from tensorflow.keras.layers import Input

# Define the U-Net model architecture
def build_unet_model(input_shape=(512, 512, 1)):
    inputs = Input(input_shape)
    
    # Encoder (Contracting Path)
    conv1 = Conv2D(64, 3, activation='relu', padding='same')(inputs)
    conv1 = Conv2D(64, 3, activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
    
    conv2 = Conv2D(128, 3, activation='relu', padding='same')(pool1)
    conv2 = Conv2D(128, 3, activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
    
    conv3 = Conv2D(256, 3, activation='relu', padding='same')(pool2)
    conv3 = Conv2D(256, 3, activation='relu', padding='same')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
    
    # Bridge
    conv4 = Conv2D(512, 3, activation='relu', padding='same')(pool3)
    conv4 = Conv2D(512, 3, activation='relu', padding='same')(conv4)
    
    # Decoder (Expanding Path)
    up5 = concatenate([Conv2DTranspose(256, 2, strides=(2, 2), padding='same')(conv4), conv3], axis=3)
    conv5 = Conv2D(256, 3, activation='relu', padding='same')(up5)
    conv5 = Conv2D(256, 3, activation='relu', padding='same')(conv5)
    
    up6 = concatenate([Conv2DTranspose(128, 2, strides=(2, 2), padding='same')(conv5), conv2], axis=3)
    conv6 = Conv2D(128, 3, activation='relu', padding='same')(up6)
    conv6 = Conv2D(128, 3, activation='relu', padding='same')(conv6)
    
    up7 = concatenate([Conv2DTranspose(64, 2, strides=(2, 2), padding='same')(conv6), conv1], axis=3)
    conv7 = Conv2D(64, 3, activation='relu', padding='same')(up7)
    conv7 = Conv2D(64, 3, activation='relu', padding='same')(conv7)
    
    # Output layer
    outputs = Conv2D(1, 1, activation='sigmoid')(conv7)
    
    model = Model(inputs=inputs, outputs=outputs)
    return model

# Create and compile the model
model = build_unet_model()
model.compile(optimizer='adam',
             loss='binary_crossentropy',
             metrics=['accuracy', tf.keras.metrics.Dice()])

# Define callbacks
callbacks = [
    ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_dice', mode='max'),
    EarlyStopping(monitor='val_dice', patience=10, mode='max'),
    ReduceLROnPlateau(monitor='val_dice', factor=0.1, patience=5, mode='max')
]

# Calculate steps per epoch based on dataset size and batch size
train_steps = len([f for f in os.listdir(os.path.join("processed_data/train", 'ct_scans')) if f.endswith('.npy')]) // 32
val_steps = len([f for f in os.listdir(os.path.join("processed_data/test", 'ct_scans')) if f.endswith('.npy')]) // 32

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_steps,
    epochs=100,
    validation_data=test_generator,
    validation_steps=val_steps,
    callbacks=callbacks
)

# Plot training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['dice'], label='Training Dice')
plt.plot(history.history['val_dice'], label='Validation Dice')
plt.title('Model Dice Score')
plt.xlabel('Epoch')
plt.ylabel('Dice Score')
plt.legend()

plt.tight_layout()
plt.show()


NameError: name 'Input' is not defined