In [12]:
import os
import nibabel as nib
import numpy as np
from pathlib import Path
import time
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import gzip
import shutil

# Set watchdog timer for performance monitoring
start_time = time.time()

def compress_nifti(input_file, output_file):
    """Compress NIFTI file using gzip"""
    with open(input_file, 'rb') as f_in:
        with gzip.open(output_file, 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
    return output_file

def process_nifti_files(input_path, output_path, batch_size=10):
    """
    Process NIFTI files and create compressed batches
    """
    try:
        # Create output directory if it doesn't exist
        os.makedirs(output_path, exist_ok=True)
        
        # Get all .nii and .nii.gz files
        nifti_files = []
        for ext in ('*.nii', '*.nii.gz'):
            nifti_files.extend(Path(input_path).glob(ext))
        
        if not nifti_files:
            print("No NIFTI files found in the input directory")
            return
            
        # Process files in batches
        for batch_num, i in enumerate(range(0, len(nifti_files), batch_size)):
            batch_files = nifti_files[i:i + batch_size]
            batch_dir = os.path.join(output_path, f'batch_{batch_num+1}')
            os.makedirs(batch_dir, exist_ok=True)
            
            for file_path in batch_files:
                try:
                    # Load NIFTI file
                    img = nib.load(str(file_path))
                    data = img.get_fdata()
                    
                    # Downsample the data to reduce size (by factor of 2)
                    data = data[::2, ::2, ::2]
                    
                    # Convert to float16 to reduce memory usage
                    data = data.astype(np.float32)
                    
                    # Normalize data to [0,1]
                    data = (data - data.min()) / (data.max() - data.min())
                    
                    # Create new NIFTI image with processed data
                    new_img = nib.Nifti1Image(data, img.affine)
                    
                    # Save to temporary uncompressed file
                    temp_output = os.path.join(batch_dir, f"temp_{file_path.stem}.nii")
                    nib.save(new_img, temp_output)
                    
                    # Compress the file
                    final_output = os.path.join(batch_dir, f"{file_path.stem}.nii.gz")
                    compress_nifti(temp_output, final_output)
                    
                    # Remove temporary file
                    os.remove(temp_output)
                    
                    # Print file size reduction
                    original_size = os.path.getsize(file_path)
                    compressed_size = os.path.getsize(final_output)
                    reduction = ((original_size - compressed_size) / original_size) * 100
                    print(f"File {file_path.name} size reduced by {reduction:.2f}%")
                    
                except Exception as e:
                    print(f"Error processing {file_path}: {str(e)}")
                    continue
                    
            print(f"Processed batch {batch_num+1}")
    except Exception as e:
        print(f"Error in batch processing: {str(e)}")
    finally:
        print("Processing completed")

# Set input and output paths
input_path = r"C:\Users\alibh\.cache\kagglehub\datasets\javariatahir\litstrain-val\versions\1\LiTS(train_test)\train_CT"
output_path = os.path.join(os.path.dirname(input_path), "compressed_batches")

# Process the files
process_nifti_files(input_path, output_path)

# Performance monitoring
end_time = time.time()
print(f"\nTotal processing time: {end_time - start_time:.2f} seconds")

# Monitor memory usage
import psutil
process = psutil.Process()
memory_info = process.memory_info()
print(f"Memory usage: {memory_info.rss / 1024 / 1024:.2f} MB")

# Set up watchdog for monitoring file changes
class FileHandler(FileSystemEventHandler):
    def on_modified(self, event):
        if not event.is_directory:
            print(f"File {event.src_path} has been modified")

observer = Observer()
event_handler = FileHandler()
observer.schedule(event_handler, output_path, recursive=True)
observer.start()

try:
    while True:
        time.sleep(1)
except KeyboardInterrupt:
    observer.stop()
finally:
    observer.join()



File volume-0.nii size reduced by 92.68%
File volume-1.nii size reduced by 92.37%
File volume-10.nii size reduced by 90.38%
File volume-100.nii size reduced by 89.55%
File volume-101.nii size reduced by 90.06%
File volume-102.nii size reduced by 89.65%
File volume-103.nii size reduced by 90.52%
File volume-104.nii size reduced by 92.15%
File volume-105.nii size reduced by 90.29%
File volume-106.nii size reduced by 92.12%
Processed batch 1
File volume-107.nii size reduced by 91.93%
File volume-108.nii size reduced by 91.75%
File volume-109.nii size reduced by 91.69%
File volume-11.nii size reduced by 89.58%
File volume-110.nii size reduced by 91.85%
File volume-12.nii size reduced by 90.51%
File volume-13.nii size reduced by 91.13%
File volume-14.nii size reduced by 90.45%
File volume-15.nii size reduced by 90.45%
File volume-16.nii size reduced by 92.31%
Processed batch 2
File volume-17.nii size reduced by 92.02%
File volume-18.nii size reduced by 92.53%
File volume-19.nii size reduced

In [13]:
# Get list of files in memory and storage
def check_files():
    try:
        # Check files in output directory
        print("\nFiles in output directory:")
        for root, dirs, files in os.walk(output_path):
            for file in files:
                file_path = os.path.join(root, file)
                file_size = os.path.getsize(file_path) / (1024 * 1024) # Size in MB
                print(f"File: {file}, Size: {file_size:.2f} MB")
                
        # Check memory usage per file loaded
        print("\nMemory usage breakdown:")
        for obj in gc.get_objects():
            if isinstance(obj, np.ndarray):
                size = obj.nbytes / (1024 * 1024) # Size in MB
                print(f"Array shape: {obj.shape}, Size: {size:.2f} MB")
                
        # Total memory stats
        memory = psutil.virtual_memory()
        print(f"\nTotal memory usage: {memory.percent}%")
        print(f"Available memory: {memory.available/(1024*1024):.2f} MB")
        
    except Exception as e:
        print(f"Error checking files: {str(e)}")

# Set watchdog timer for performance
start = time.time()
check_files()
end = time.time()
print(f"\nTime taken to check files: {end-start:.2f} seconds")



Files in output directory:
File: volume-0.nii.gz, Size: 2.75 MB
File: volume-1.nii.gz, Size: 4.69 MB
File: volume-10.nii.gz, Size: 24.09 MB
File: volume-100.nii.gz, Size: 35.79 MB
File: volume-101.nii.gz, Size: 33.94 MB
File: volume-102.nii.gz, Size: 35.02 MB
File: volume-103.nii.gz, Size: 32.39 MB
File: volume-104.nii.gz, Size: 30.65 MB
File: volume-105.nii.gz, Size: 47.87 MB
File: volume-106.nii.gz, Size: 30.39 MB
File: volume-80.nii.gz, Size: 9.62 MB
File: volume-81.nii.gz, Size: 13.64 MB
File: volume-82.nii.gz, Size: 21.20 MB
File: volume-83.nii.gz, Size: 42.35 MB
File: volume-84.nii.gz, Size: 37.73 MB
File: volume-85.nii.gz, Size: 33.03 MB
File: volume-86.nii.gz, Size: 33.79 MB
File: volume-87.nii.gz, Size: 36.68 MB
File: volume-88.nii.gz, Size: 33.61 MB
File: volume-89.nii.gz, Size: 32.16 MB
File: volume-9.nii.gz, Size: 25.85 MB
File: volume-90.nii.gz, Size: 31.02 MB
File: volume-91.nii.gz, Size: 30.34 MB
File: volume-92.nii.gz, Size: 34.97 MB
File: volume-93.nii.gz, Size: 29.17

In [19]:
# Import required libraries
import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import time
import termcolor as tc

# Set watchdog timer
start_time = time.time()

# Define model architecture for liver/tumor detection
def create_model(input_shape=(256, 256, 3)):
    model = models.Sequential([
        layers.Conv2D(32, 3, activation='relu', padding='same', input_shape=input_shape),
        layers.MaxPooling2D(),
        layers.Conv2D(64, 3, activation='relu', padding='same'),
        layers.MaxPooling2D(),
        layers.Conv2D(128, 3, activation='relu', padding='same'),
        layers.MaxPooling2D(),
        layers.Conv2D(256, 3, activation='relu', padding='same'),
        layers.UpSampling2D(),
        layers.Conv2D(128, 3, activation='relu', padding='same'),
        layers.UpSampling2D(),
        layers.Conv2D(64, 3, activation='relu', padding='same'),
        layers.UpSampling2D(),
        layers.Conv2D(32, 3, activation='relu', padding='same'),
        layers.Conv2D(1, 1, activation='sigmoid')
    ])
    return model

# Load and preprocess data in batches
def load_data_batch(batch_size=32):
    # Assuming data is stored in output_path
    images = []
    masks = []
    
    for root, _, files in os.walk(output_path):
        image_files = [f for f in files if f.endswith('.npy') and 'mask' not in f]
        mask_files = [f for f in files if f.endswith('.npy') and 'mask' in f]
        
        for i in range(0, len(image_files), batch_size):
            batch_images = image_files[i:i + batch_size]
            batch_masks = mask_files[i:i + batch_size]
            
            for img_file, mask_file in zip(batch_images, batch_masks):
                try:
                    img = np.load(os.path.join(root, img_file))
                    mask = np.load(os.path.join(root, mask_file))
                    
                    # Resize to standard size
                    img = cv2.resize(img, (256, 256))
                    mask = cv2.resize(mask, (256, 256))
                    
                    images.append(img)
                    masks.append(mask)
                except Exception as e:
                    print(f"Error loading file {img_file}: {str(e)}")
                    continue
                    
            yield np.array(images), np.array(masks)
            images = []
            masks = []

# Initialize model
model = create_model()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Training parameters
BATCH_SIZE = 32
EPOCHS = 10

# Train model with batch generator
try:
    for epoch in range(EPOCHS):
        print(f"\nEpoch {epoch+1}/{EPOCHS}")
        batch_generator = load_data_batch(BATCH_SIZE)
        
        for batch_idx, (X_batch, y_batch) in enumerate(batch_generator):
            if len(X_batch) == 0:
                continue
                
            # Monitor memory usage
            memory = psutil.virtual_memory()
            if memory.percent > 90:
                print("Warning: High memory usage detected!")
                gc.collect()
            
            history = model.fit(X_batch, y_batch, batch_size=BATCH_SIZE, verbose=1)
            
            if batch_idx % 10 == 0:
                print(f"Batch {batch_idx} - Loss: {history.history['loss'][0]:.4f}")

except Exception as e:
    print(f"Training error: {str(e)}")

# Function to detect and visualize tumors
def detect_tumors(image):
    processed_img = cv2.resize(image, (256, 256))
    prediction = model.predict(np.expand_dims(processed_img, 0))[0]
    
    # Draw bounding boxes for detected tumors
    threshold = 0.5
    binary_mask = (prediction > threshold).astype(np.uint8)
    contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    result_img = image.copy()
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        cv2.rectangle(result_img, (x, y), (x+w, y+h), (0, 0, 255), 2)  # Red box
    
    return result_img

# Test the model on a sample image
try:
    sample_files = os.listdir(output_path)[:5]  # Get first 5 images
    for file in sample_files:
        if file.endswith('.npy') and 'mask' not in file:
            img = np.load(os.path.join(output_path, file))
            result = detect_tumors(img)
            
            plt.figure(figsize=(10, 5))
            plt.subplot(1, 2, 1)
            plt.imshow(img)
            plt.title('Original Image')
            plt.subplot(1, 2, 2)
            plt.imshow(result)
            plt.title('Detected Tumors')
            plt.show()

except Exception as e:
    print(f"Error in tumor detection: {str(e)}")

# Performance metrics
end_time = time.time()
print(f"\nTotal execution time: {end_time - start_time:.2f} seconds")
print(f"Final memory usage: {psutil.virtual_memory().percent}%")
print(f"Available memory: {psutil.virtual_memory().available/(1024*1024):.2f} MB")


ModuleNotFoundError: No module named 'termcolor'

In [15]:
!pip install termcolor


Defaulting to user installation because normal site-packages is not writeable
