this is pretransformation file 

In [None]:
import pydicom
import pandas as pd
import torch
from PIL import Image, ImageDraw
import cv2

# Function to create a bounding box around (x, y) coordinates
def create_bounding_box(image, x, y, box_size=50):
    """
    Draw a bounding box around the (x, y) coordinates on the given image.
    The box size is centered around the (x, y) coordinates.
    """
    image = image.convert('RGB')  # Convert to RGB
    draw = ImageDraw.Draw(image)

    # Box size (width, height)
    half_box = box_size // 2
    
    # Calculate the top-left and bottom-right corners of the bounding box
    left = x - half_box
    top = y - half_box
    right = x + half_box
    bottom = y + half_box
    
    # Draw the bounding box (in red color)
    draw.rectangle([left, top, right, bottom], outline="red", width=3)
    
    return image, left, top, right, bottom  # Return image and bounding box coordinates

# Function to process all images and update the bounding boxes
def process_images_and_create_boxes(df, box_size=50):
    """
    Process each image in the DataFrame to create bounding boxes.
    Updates the DataFrame with the bounding box coordinates.
    """
    for index, row in df.iterrows():
        # Skip rows with missing coordinates
        if pd.isna(row['x']) or pd.isna(row['y']):
            print(f"Skipping index {index} due to missing x/y coordinates")
            continue
        
        image_path = row['image_path']
        x = row['x']
        y = row['y']

        # Read the DICOM image
        try:
            dicom_image = pydicom.dcmread(image_path)
            image = dicom_image.pixel_array.astype(float)
            image = (image / image.max() * 255).astype('uint8')  # Normalize image

            # Convert grayscale to RGB if necessary
            if len(image.shape) == 2:  # Grayscale image
                image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)

            # Convert the image to a PIL Image object for bounding box drawing
            image = Image.fromarray(image)
        except Exception as e:
            print(f"Error reading DICOM image at index {index}: {e}")
            continue  # Skip invalid images

        # Create a bounding box around the (x, y) coordinates
        image_with_box, left, top, right, bottom = create_bounding_box(image, x, y, box_size)

        # Update DataFrame with bounding box coordinates
        df.at[index, 'x_min'] = left
        df.at[index, 'y_min'] = top
        df.at[index, 'x_max'] = right
        df.at[index, 'y_max'] = bottom

        # Optionally: Save image with box if needed (example saving the first image)
        if index == 0:  # You can change this to save multiple or all images
            image_with_box.save(f"image_with_box_{index}.png")
        
        print(f"Updated bounding box for index {index}: x_min={left}, y_min={top}, x_max={right}, y_max={bottom}")

    return df

# Assuming train_df is already loaded
# Process images and update bounding boxes
train_df = process_images_and_create_boxes(train_df)

# Save the updated DataFrame with bounding boxes
train_df.to_csv('train_with_boxes.csv', index=False)

# Print the updated bounding boxes
print(train_df[['x_min', 'y_min', 'x_max', 'y_max']].head())


In [None]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.models as models
from torchvision import transforms
import pydicom
import cv2
import pandas as pd
import numpy as np
import random
import mlflow
import mlflow.pytorch
import os
import matplotlib.pyplot as plt

# Set random seed for reproducibility
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# Create a new experiment in MLflow
Resnet50_class_seg1_seg1_basic = "Resnet50_class_seg1_seg1_basic"
mlflow.set_experiment(Resnet50_class_seg1_seg1_basic)

# Define the transformations
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.RandomRotation(4),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class MRIDataset(Dataset):
    def __init__(self, data, transform=None, mode='classification'):
        """
        Args:
            data (pd.DataFrame): Data containing image paths and corresponding labels.
            transform (callable, optional): Optional transformation to apply to images.
            mode (str): Mode can be 'localization' or 'classification'.
        """
        self.data = data
        self.transform = transform
        self.mode = mode
    def __len__(self):
        # Return the number of samples in the dataset (usually rows in your DataFrame)
        return len(self.data)
    
    def create_bounding_box(self, x, y, image_shape):
        """
        Create a bounding box based on the x, y coordinates.
        Assumes a small bounding box around the coordinates.
        """
        margin = 20  # Example margin for the bounding box
        x_min = max(0, int(x - margin))
        y_min = max(0, int(y - margin))
        x_max = min(image_shape[1], int(x + margin))
        y_max = min(image_shape[0], int(y + margin))
        return x_min, y_min, x_max, y_max

    def __getitem__(self, index):
        """
        Args:
            index (int): Index of the sample to retrieve.

        Returns:
            tuple: (image_tensor, label) where label is either a class label (for classification)
                   or coordinates (for localization).
        """
        row = self.data.iloc[index]
        image_path = row['image_path']
        x, y = row['x'], row['y']
        label = row['severity']  # Use severity for classification tasks
        
        dicom_image = pydicom.dcmread(image_path)
        image = dicom_image.pixel_array.astype(float)
        image = (image / image.max() * 255).astype('uint8')  # Normalize

        # Convert grayscale to RGB if necessary
        if len(image.shape) == 2:  # Grayscale image
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)

        # Apply transformations
        image_tensor = self.transform(image) if self.transform else torch.from_numpy(image).permute(2, 0, 1)

        if self.mode == 'localization':
            # Create a bounding box for localization
            x_min, y_min, x_max, y_max = self.create_bounding_box(x, y, image.shape)
            bounding_box = torch.tensor([x_min, y_min, x_max, y_max], dtype=torch.float32)
            return image_tensor, bounding_box
        elif self.mode == 'classification':
            # Crop the image to the bounding box if in classification mode after localization
            x_min, y_min, x_max, y_max = self.create_bounding_box(x, y, image.shape)
            cropped_image = image[y_min:y_max, x_min:x_max]
            cropped_image_tensor = self.transform(cropped_image) if self.transform else torch.from_numpy(cropped_image).permute(2, 0, 1)
            
            # Ensure label is of type long (for CrossEntropyLoss)
            return cropped_image_tensor, torch.tensor(label).long()  # Ensure label is long tensor


# Ensure the data only contains .dcm files
train_data = train_data[train_data['image_path'].str.endswith('.dcm')]

# Create the dataset
dataset = MRIDataset(data=train_data, transform=transform)

from sklearn.model_selection import train_test_split

# Split indices for training and validation datasets
train_indices, val_indices = train_test_split(
    list(range(len(dataset))), test_size=0.2, random_state=42, stratify=dataset.data['severity']
)

# Create subsets based on the indices
train_dataset = torch.utils.data.Subset(dataset, train_indices)
val_dataset = torch.utils.data.Subset(dataset, val_indices)
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Load ResNet-50 and set up for classification
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_classes = train_data['severity'].nunique()

model = models.resnet50(weights='IMAGENET1K_V1')
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.to(device)

# Define the loss function and optimizer
learning_rate = 0.0001
criterion_cel = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
num_epochs = 20

# Early stopping parameters
diverge_count = 0              # Counter for divergence-based stopping
stop_threshold = 0.3           # Threshold for divergence
max_diverge_count = 3          # Max number of epochs with diverging validation loss
patience = 5  # Number of epochs to wait for improvement before early stopping
 

# Start MLflow run
with mlflow.start_run():
    # Log parameters
    mlflow.log_param("learning_rate", learning_rate)
    mlflow.log_param("optimizer", "Adam")
    mlflow.log_param("batch_size", batch_size)
    mlflow.log_param("num_epochs", num_epochs)
    mlflow.log_param("num_classes", num_classes)
    mlflow.log_param("model_architecture", "ResNet-50")
    
    # Log early stopping parameters
    mlflow.log_param("stop_threshold", stop_threshold)
    mlflow.log_param("max_diverge_count", max_diverge_count)
    mlflow.log_param("patience", patience)
    
    # Set descriptive tags for the model
    mlflow.set_tag("model_description", "ResNet-50 for classification using MRI images")

    best_val_loss = float('inf')
    patience_counter = 0
    example_input = torch.randn(1, 3, 224, 224)  # Batch size 1, 3 color channels, 224x224 image
    train_losses_cel = []  # List to store training losses
    val_losses_cel = []  # List to store validation losses
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss_cel_train = 0.0

        for images, labels in train_loader: 
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            
            outputs = model(images)
            
            # Cross Entropy Loss
            loss_cel = criterion_cel(outputs, labels)
            running_loss_cel_train += loss_cel.item()
            
            # Backward pass and optimization
            loss_cel.backward()
            optimizer.step()

        # Calculate average training loss for the epoch
        epoch_loss_cel_train = running_loss_cel_train / len(train_loader)
        train_losses_cel.append(epoch_loss_cel_train)

        # Log training loss to MLflow
        mlflow.log_metric("train_loss_cel", epoch_loss_cel_train, step=epoch)

        # Validation phase
        model.eval()
        running_loss_cel_val = 0.0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                
                # Cross Entropy Loss for validation
                loss_cel = criterion_cel(outputs, labels)
                running_loss_cel_val += loss_cel.item()

        # Calculate validation loss for the epoch
        epoch_loss_cel_val = running_loss_cel_val / len(val_loader)
        val_losses_cel.append(epoch_loss_cel_val)

        # Log validation loss to MLflow
        mlflow.log_metric("val_loss_cel", epoch_loss_cel_val, step=epoch)

        print(f'Epoch [{epoch + 1}/{num_epochs}], '
              f'Train Cross Entropy Loss: {epoch_loss_cel_train:.4f}, '
              f'Validation Cross Entropy Loss: {epoch_loss_cel_val:.4f}')

        # Save weights for each epoch
        epoch_weight_path = f"model_weights_epoch_{epoch + 1}.pt"
        torch.save(model.state_dict(), epoch_weight_path)
        mlflow.log_artifact(epoch_weight_path)  # Log model weights as artifact for each epoch
        os.remove(epoch_weight_path)  # Optionally, delete the local file after logging

        # Check for early stopping (divergence-based stopping)
        if epoch_loss_cel_val > epoch_loss_cel_train * (1 + stop_threshold):
            diverge_count += 1
            if diverge_count >= max_diverge_count:
                print(f"Early stopping at epoch {epoch + 1} due to divergence.")
                break
        else:
            diverge_count = 0

        # Check for early stopping (patience-based stopping)
        if epoch_loss_cel_val < best_val_loss:
            best_val_loss = epoch_loss_cel_val
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch + 1} due to no improvement in validation loss.")
                break

    # Final model log
    mlflow.pytorch.log_model(model, "model")


In [None]:
image = Image.open('path_to_your_image.dcm')
image = transform(image)
print(image.shape)  # Expected output: torch.Size([3, 224, 224])

In [None]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.models as models
from torchvision import transforms
import pydicom
import cv2
import pandas as pd
import numpy as np
import random
import mlflow
import mlflow.pytorch

# Set random seed for reproducibility
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# Create a new experiment in MLflow
Resnet50_class_seg1_seg1_basic = "Resnet50_class_seg1_seg1_basic"
mlflow.set_experiment(Resnet50_class_seg1_seg1_basic)

# Define the transformations
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.RandomRotation(4),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Define the Dataset class
class MRIDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform
        self.data['severity'] = self.data['severity'].astype(int)  # Ensure severity is an integer

    def __getitem__(self, index):
        row = self.data.iloc[index]
        image_path = row['image_path']
        label = row['severity']  # Use severity for the label

        dicom_image = pydicom.dcmread(image_path)
        image = dicom_image.pixel_array.astype(float)
        image = (image / image.max() * 255).astype('uint8')  # Normalize

        # Convert grayscale to RGB if necessary
        if len(image.shape) == 2:  # Grayscale image
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)

        image_tensor = self.transform(image) if self.transform else torch.from_numpy(image).permute(2, 0, 1)
        return image_tensor, torch.tensor(label).long()

    def __len__(self):
        return len(self.data)

# Ensure the data only contains .dcm files
train_data = train_data[train_data['image_path'].str.endswith('.dcm')]

# Create the dataset
dataset = MRIDataset(data=train_data, transform=transform)

# Split the dataset into training and validation sets (80% train, 20% validation)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Load ResNet-50 and set up for classification
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_classes = train_data['severity'].nunique()

model = models.resnet50(weights='IMAGENET1K_V1')
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.to(device)

# Define the loss function and optimizer
learning_rate = 0.0001
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
num_epochs = 20

# Early stopping parameters
diverge_count = 0              # Counter for divergence-based stopping
stop_threshold = 0.3           # Threshold for divergence
max_diverge_count = 3          # Max number of epochs with diverging validation loss
patience = 5  # Number of epochs to wait for improvement before early stopping
# Early stopping parameters
train_losses = []
val_losses = []

# Start MLflow run
with mlflow.start_run():
    # Log parameters
    mlflow.log_param("learning_rate", 'learning_rate')
    mlflow.log_param("optimizer", "Adam")
    mlflow.log_param("batch_size", 'batch_size')
    mlflow.log_param("num_epochs", num_epochs)
    mlflow.log_param("num_classes", num_classes)
    mlflow.log_param("model_architecture", "ResNet-50")
    
    # Log early stopping parameters
    mlflow.log_param("stop_threshold", stop_threshold)
    mlflow.log_param("max_diverge_count", max_diverge_count)
    mlflow.log_param("patience", patience)
    
    # Set descriptive tags for the model
    mlflow.set_tag("model_description", "ResNet-50 for classification using MRI images")

    best_val_loss = float('inf')
    patience_counter = 0

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss_train = 0.0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()

            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss_train += loss.item()

            loss.backward()
            optimizer.step()

        epoch_loss_train = running_loss_train / len(train_loader)
        train_losses.append(epoch_loss_train)

        # Log training loss to MLflow
        mlflow.log_metric("train_loss", epoch_loss_train, step=epoch)

        # Validation phase
        model.eval()
        running_loss_val = 0.0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                running_loss_val += loss.item()

        epoch_loss_val = running_loss_val / len(val_loader)
        val_losses.append(epoch_loss_val)

        # Log validation loss to MLflow
        mlflow.log_metric("val_loss", epoch_loss_val, step=epoch)

        # Early stopping
        if epoch_loss_val < best_val_loss:
            best_val_loss = epoch_loss_val
            patience_counter = 0  # Reset patience counter
        else:
            patience_counter += 1

        if patience_counter >= 5:
            print("Early stopping due to no improvement in validation loss.")
            break

# Log the trained model to MLflow
mlflow.pytorch.log_model(model, "model")

# Optionally, plot the training/validation losses
import matplotlib.pyplot as plt
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.legend()
plt.show()


In [None]:
# define transformation and class

import mlflow
import mlflow.pytorch
import random
# Set random seed for reproducibility
seed = 42  # You can choose any integer
torch.manual_seed(seed)
np.random.seed(seed)  # Set seed for numpy
random.seed(seed)  # Set seed for random module

# Create a new experiment
Resnet50_class_seg1_seg1_basic = "Resnet50_class_seg1_seg1_basic"
mlflow.set_experiment(Resnet50_class_seg1_seg1_basic)

import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, random_split
# %pip install torch torchvision
import torchvision.models as models
from torchvision import transforms
import pydicom
# %pip install opencv-python
import cv2
import pandas as pd


# Define the transform with augmentation: I already tranformed i tbfore 

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    #transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    transforms.RandomRotation(4),       # Randomly rotate the image by ±10 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Adjust color properties
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])



class MRIDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

        # Ensure severity is in integer format
        self.data['severity'] = self.data['severity'].astype(int)

    def __getitem__(self, index):
        row = self.data.iloc[index]
        image_path = row['image_path']
        label = row['severity']  # Use severity for the label

        dicom_image = pydicom.dcmread(image_path)
        image = dicom_image.pixel_array.astype(float)
        image = (image / image.max() * 255).astype('uint8')  # Normalize

        # Convert the image to RGB if it is grayscale
        if len(image.shape) == 2:  # Grayscale
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)

        # Apply transformations including augmentation
        image_tensor = self.transform(image) if self.transform else torch.from_numpy(image).permute(2, 0, 1)

        return image_tensor, torch.tensor(label).long()  # Return label as tensor

    def __len__(self):
        return len(self.data)

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import models
# %pip install keras
# %pip install tensorflow
# from tensorflow.keras.callbacks import EarlyStopping  # Import EarlyStopping
import numpy as np  # Import numpy for setting the random seed

 

# Ensure the data only contains .dcm files
train_data = train_data[train_data['image_path'].str.endswith('.dcm')]

# Create the dataset
dataset = MRIDataset(data=train_data, transform=transform)

# Split the dataset into training and validation sets (80% train, 20% validation)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

batch_size = 16
# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Load ResNet-50 and set up for classification
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_classes = train_df['severity'].nunique()

model = models.resnet50(weights='IMAGENET1K_V1')
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.to(device)

# Define loss functions and optimizer
learining_rate = 0.0001 
criterion_cel = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learining_rate)
num_epochs = 20

# Early stopping parameters
# Lists to store loss values for plotting
train_losses_cel = []
val_losses_cel = []

# Early stopping parameters
diverge_count = 0              # Counter for divergence-based stopping
stop_threshold = 0.3           # Threshold for divergence
max_diverge_count = 3          # Max number of epochs with diverging validation loss
patience = 5  # Number of epochs to wait for improvement before early stopping


# Calculate number of layers in the model
num_layers = len(list(model.children()))
import torch
import numpy as np
import os
import mlflow
import matplotlib.pyplot as plt

# Start MLflow run
with mlflow.start_run():
    # Log parameters
    mlflow.log_param("learning_rate", 'learning_rate')
    mlflow.log_param("optimizer", "Adam")
    mlflow.log_param("batch_size", 'batch_size')
    mlflow.log_param("num_epochs", num_epochs)
    mlflow.log_param("num_classes", num_classes)
    mlflow.log_param("model_architecture", "ResNet-50")
    
    # Log early stopping parameters
    mlflow.log_param("stop_threshold", stop_threshold)
    mlflow.log_param("max_diverge_count", max_diverge_count)
    mlflow.log_param("patience", patience)

    # Set descriptive tags for the model
    mlflow.set_tag("model_description", "ResNet-50 for 3 cat and Sagittal T2/STIR and Sagittal T1 images")

    # Example input tensor with the same shape as the model's expected input
    example_input = torch.randn(1, 3, 224, 224)  # Batch size 1, 3 color channels, 224x224 image
    best_val_loss = float('inf')  # Initialize best validation loss
    patience_counter = 0  # Plateau counter
    
    diverge_count = 0  # Counter for divergence-based stopping

    train_losses_cel = []  # List to store training losses
    val_losses_cel = []  # List to store validation losses

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss_cel_train = 0.0

        for images, labels in train_loader: 
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            
            outputs = model(images)
            
            # Cross Entropy Loss
            loss_cel = criterion_cel(outputs, labels)
            running_loss_cel_train += loss_cel.item()
            
            # Backward pass and optimization
            loss_cel.backward()
            optimizer.step()

        # Calculate average training loss for the epoch
        epoch_loss_cel_train = running_loss_cel_train / len(train_loader)
        train_losses_cel.append(epoch_loss_cel_train)

        # Log training loss to MLflow
        mlflow.log_metric("train_loss_cel", epoch_loss_cel_train, step=epoch)

        # Validation phase
        model.eval()
        running_loss_cel_val = 0.0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                
                # Cross Entropy Loss for validation
                loss_cel = criterion_cel(outputs, labels)
                running_loss_cel_val += loss_cel.item()

        # Calculate validation loss for the epoch
        epoch_loss_cel_val = running_loss_cel_val / len(val_loader)
        val_losses_cel.append(epoch_loss_cel_val)

        # Log validation loss to MLflow
        mlflow.log_metric("val_loss_cel", epoch_loss_cel_val, step=epoch)

        print(f'Epoch [{epoch + 1}/{num_epochs}], '
              f'Train Cross Entropy Loss: {epoch_loss_cel_train:.4f}, '
              f'Validation Cross Entropy Loss: {epoch_loss_cel_val:.4f}')

        # Save weights for each epoch
        epoch_weight_path = f"model_weights_epoch_{epoch + 1}.pt"
        torch.save(model.state_dict(), epoch_weight_path)
        mlflow.log_artifact(epoch_weight_path)  # Log model weights as artifact for each epoch
        os.remove(epoch_weight_path)  # Optionally, delete the local file after logging

        # Check for early stopping (divergence-based stopping)
        if epoch_loss_cel_val > epoch_loss_cel_train * (1 + stop_threshold):
            diverge_count += 1
            if diverge_count >= max_diverge_count:
                print(f"Early stopping at epoch {epoch + 1} due to validation loss diverging.")
                break
        else:
            diverge_count = 0  # Reset diverge count if validation loss is not diverging

        # Check for early stopping (plateau-based stopping)
        if epoch_loss_cel_val < best_val_loss:
            best_val_loss = epoch_loss_cel_val
            patience_counter = 0  # Reset plateau counter if validation loss improves
            # Save the model weights when validation loss improves (best model)
            best_model_weight_path = f"best_model_weights_epoch_{epoch + 1}.pt"
            torch.save(model.state_dict(), best_model_weight_path)
            mlflow.log_artifact(best_model_weight_path)  # Log the best model weights
            os.remove(best_model_weight_path)  # Optionally, delete the local file after logging
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch + 1} due to lack of validation loss improvement.")
                break

    print("Training complete!")

    # Log final model
    example_input_np = example_input.numpy()
    mlflow.pytorch.log_model(model, "final_model", input_example=example_input_np)

    # Plot and log the loss curves as artifacts
    plt.figure(figsize=(10, 5))
    plt.plot(train_losses_cel, label='Train Cross Entropy Loss')
    plt.plot(val_losses_cel, label='Validation Cross Entropy Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Cross Entropy Loss')
    plt.legend()
    plt.savefig("cross_entropy_loss.png")
    mlflow.log_artifact("cross_entropy_loss.png")


In [None]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from torchvision import transforms
import pydicom
import cv2
import pandas as pd

class MRIDataset(Dataset):
    def __init__(self, data, transform=None, mode='classification', box_size=50):
        self.data = data
        self.transform = transform
        self.mode = mode
        self.box_size = box_size

    def create_bounding_box(self, x, y, img_shape):
        x_min = max(0, int(x - self.box_size / 2))
        y_min = max(0, int(y - self.box_size / 2))
        x_max = min(img_shape[1], int(x + self.box_size / 2))
        y_max = min(img_shape[0], int(y + self.box_size / 2))
        return x_min, y_min, x_max, y_max

    def __getitem__(self, index):
        row = self.data.iloc[index]
        image_path = row['image_path']
        x, y = row['x'], row['y']
        label = row['condition'] if self.mode == 'classification' else (x, y)

        dicom_image = pydicom.dcmread(image_path)
        image = dicom_image.pixel_array.astype(float)
        image = (image / image.max() * 255).astype('uint8')  # Normalize

        if self.mode == 'localization':
            x_min, y_min, x_max, y_max = self.create_bounding_box(x, y, image.shape)
            roi = image[y_min:y_max, x_min:x_max]
            roi = cv2.resize(roi, (224, 224))
            image_tensor = self.transform(roi) if self.transform else roi
        else:
            annotated_image = image.copy()
            cv2.circle(annotated_image, (int(x), int(y)), 10, (255, 0, 0), -1)
            annotated_image = cv2.resize(annotated_image, (224, 224))
            image_tensor = self.transform(annotated_image) if self.transform else annotated_image

        # Convert grayscale (1 channel) to RGB (3 channels)
        if image_tensor.ndim == 2:
            image_tensor = torch.from_numpy(image_tensor).unsqueeze(0)  # Add channel dimension
        image_tensor = image_tensor.repeat(3, 1, 1)  # Duplicate to 3 channels

        return image_tensor, label

    def __len__(self):
        return len(self.data)


# Transformations for the images
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485], std=[0.229])
])

# Sample Data for one person  
data = pd.DataFrame({
    'image_path': ['data/train_images_origin/1028909382/1477339972/24.dcm'],
    'x': [324.88], 'y': [485.87], 'condition': [1]    # since als persons belong to persons with df_end
})
dataset = MRIDataset(data=data, transform=transform, mode='classification')
dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

# Load ResNet-50 and Set Mode
mode = 'classification'  # my goal is to classify the images
num_classes = 3   # 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = models.resnet50(pretrained=True)
if mode == 'classification':
    model.fc = nn.Linear(model.fc.in_features, num_classes)
else:
    model.fc = nn.Linear(model.fc.in_features, 4)

model.to(device)


criterion = nn.CrossEntropyLoss() if mode == 'classification' else nn.SmoothL1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10

for epoch in range(num_epochs):
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        
        if mode == 'classification':
            loss = criterion(outputs, labels)
        else:
            loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


In [None]:
def keep_persons(dataframes, study_ids_to_keep, all_studies=False):
    if all_studies:
        return dataframes  # Return all DataFrames if all_studies is True
    
    filtered_dataframes = []
    for df in dataframes:
        # Filter the DataFrame based on study_ids_to_keep
        filtered_df = df[df['study_id'].isin(study_ids_to_keep)]
        filtered_dataframes.append(filtered_df)  # Append the filtered DataFrame to the list

    return filtered_dataframes 

 
import pandas as pd

# Sample DataFrame
data = {
    'study_id': [4003253, 4003254, 4003253, 4003255],
    'patient_name': ['Alice', 'Bob', 'Charlie', 'David']
}

df_image_paths = pd.DataFrame(data)

# Using the corrected function
study_ids_to_keep = [4003253]
dataframes = [df_image_paths]

filtered_dataframes = keep_persons(dataframes, study_ids_to_keep, all_studies=False)

# Print the filtered DataFrames
for filtered_df in filtered_dataframes:
    print(filtered_df)


In [None]:
import pandas as pd
import numpy as np

# Define the df_train DataFrame
df_train = pd.DataFrame({
    "site_id": [2, 2, 2, 2, 2],
    "patient_id": [10006, 10006, 10006, 10006, 10011],
    "image_id": [462822612, 1459541791, 1864590858, 1874946579, 220375232],
    "laterality": ["L", "L", "R", "R", "L"],
    "view": ["CC", "MLO", "MLO", "CC", "CC"],
    "age": [61.0, 61.0, 61.0, 61.0, 55.0],
    "cancer": [0, 0, 0, 0, 0],
    "biopsy": [0, 0, 0, 0, 0],
    "invasive": [0, 0, 0, 0, 0],
    "BIRADS": [np.nan, np.nan, np.nan, np.nan, 0.0],
    "implant": [0, 0, 0, 0, 0],
    "density": [np.nan, np.nan, np.nan, np.nan, np.nan],
    "machine_id": [29, 29, 29, 29, 21],
    "difficult_negative_case": [False, False, False, False, True]
})

df_train

In [None]:
# Code to create the 'data' DataFrame with the 'class' column
data = pd.DataFrame(
    np.concatenate([
        ['Total'] * len(df_train),  # Label 'Total' for each row in df_train
        ['Malignant Cancer'] * len(df_train[df_train['cancer'] == 1]),  # Label 'Malignant Cancer' for rows with cancer == 1
        ['Invasive Cancer'] * len(df_train[(df_train['cancer'] == 1) & (df_train['invasive'] == 1)])  # Label 'Invasive Cancer' for rows with cancer == 1 and invasive == 1
    ]),
    columns=["class"]
)

print(data)

In [None]:
# The not-malignant cancer cases were limited to biopsy cases.
DF_train = df_train[df_train['biopsy'] == 1].reset_index(drop = True)
DF_train.head()