Necessary packages:

In [None]:
import glob
import os
import warnings


import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import pydicom
from pydicom.data import get_testdata_files
from sklearn.model_selection import train_test_split

from IPython.display import Markdown, display

 # Import functions from the module
import importlib
import help_files._0_definitions 
import  help_files._1_visuals_script
# import  help_files._01_load_data
 # Reload the module to apply the changes to the script
importlib.reload(help_files._0_definitions)
importlib.reload(help_files._1_visuals_script)
# importlib.reload(help_files._01_load_data)
import  help_files._1_visuals_script  as pauls_vs

# Define the path
from pathlib import Path

 

In [None]:
### In definitions are all the functions that are used in the notebook and globals
with open("help_files/_0_definitions.py") as file:
    exec(file.read())

In [None]:
# Load the data from _01_load_data
file_names = ["X_train.csv", "X_train_coor.csv", "X_train_des.csv"]
dataframes = [pd.read_csv(data_path_vor / file_name) for file_name in file_names]
X_train, X_train_coor, X_train_des = dataframes

### X_train manipulation

In [None]:
X_train

In [None]:
# Summary statistics of the dataframe
summary = X_train.describe(include='all')

# Information about the dataframe
info = X_train.info()

# Display the summary and info
print(summary)
print(info)

In [None]:
X_train.dtypes

In [None]:
# List of columns to iterate through: 
columns_to_iterate = [
    'spinal_canal_stenosis_l1_l2', 'spinal_canal_stenosis_l2_l3', 'spinal_canal_stenosis_l3_l4',
    'spinal_canal_stenosis_l4_l5', 'spinal_canal_stenosis_l5_s1', 'left_neural_foraminal_narrowing_l1_l2',
    'left_neural_foraminal_narrowing_l2_l3', 'left_neural_foraminal_narrowing_l3_l4', 'left_neural_foraminal_narrowing_l4_l5',
    'left_neural_foraminal_narrowing_l5_s1', 'right_neural_foraminal_narrowing_l1_l2', 'right_neural_foraminal_narrowing_l2_l3',
    'right_neural_foraminal_narrowing_l3_l4', 'right_neural_foraminal_narrowing_l4_l5', 'right_neural_foraminal_narrowing_l5_s1',
    'left_subarticular_stenosis_l1_l2', 'left_subarticular_stenosis_l2_l3', 'left_subarticular_stenosis_l3_l4',
    'left_subarticular_stenosis_l4_l5', 'left_subarticular_stenosis_l5_s1', 'right_subarticular_stenosis_l1_l2',
    'right_subarticular_stenosis_l2_l3', 'right_subarticular_stenosis_l3_l4', 'right_subarticular_stenosis_l4_l5',
    'right_subarticular_stenosis_l5_s1'
]


# Example operation: Fill missing values with 'Unknown'
for column in columns_to_iterate:
    X_train[column] = X_train[column].map({'Normal/Mild': 1, 'Moderate': 2, 'Severe': 3})

X_train

In [None]:
### Distribution of the target variables 

In [None]:
# Create an empty list to store the distribution data
distribution_data = []

# Calculate the distribution for each column
for column in columns_to_iterate:
    counts = X_train[column].value_counts().sort_index()
    total_counts = counts.sum()
    distribution_data.append({
        'Condition': column,
        'Normal/Mild': counts.get(1.0, 0),
        'Moderate': counts.get(2.0, 0),
        'Severe': counts.get(3.0, 0)
    })
    distribution_data[-1]['Normal/Mild (%)'] = (counts.get(1.0, 0) / total_counts) * 100
    distribution_data[-1]['Moderate (%)'] = (counts.get(2.0, 0) / total_counts) * 100
    distribution_data[-1]['Severe (%)'] = (counts.get(3.0, 0) / total_counts) * 100

# Convert the list to a DataFrame
distribution_df = pd.DataFrame(distribution_data)

# Display the distribution DataFrame
print(distribution_df)

# Summarize the columns to iterate
summary_stats = X_train[columns_to_iterate].describe(include='all')
print(summary_stats)


In [None]:
X_train_des

In [None]:
## reshaping the data from wide to long
reshape_list = ['spinal_canal_stenosis_l1_l2', 'spinal_canal_stenosis_l2_l3', 'spinal_canal_stenosis_l3_l4', 'spinal_canal_stenosis_l4_l5', 'spinal_canal_stenosis_l5_s1', 'left_neural_foraminal_narrowing_l1_l2', 'left_neural_foraminal_narrowing_l2_l3', 'left_neural_foraminal_narrowing_l3_l4', 'left_neural_foraminal_narrowing_l4_l5', 'left_neural_foraminal_narrowing_l5_s1', 'right_neural_foraminal_narrowing_l1_l2', 'right_neural_foraminal_narrowing_l2_l3', 'right_neural_foraminal_narrowing_l3_l4', 'right_neural_foraminal_narrowing_l4_l5', 'right_neural_foraminal_narrowing_l5_s1', 'left_subarticular_stenosis_l1_l2', 'left_subarticular_stenosis_l2_l3', 'left_subarticular_stenosis_l3_l4', 'left_subarticular_stenosis_l4_l5', 'left_subarticular_stenosis_l5_s1', 'right_subarticular_stenosis_l1_l2', 'right_subarticular_stenosis_l2_l3', 'right_subarticular_stenosis_l3_l4', 'right_subarticular_stenosis_l4_l5', 'right_subarticular_stenosis_l5_s1']  
# Assuming df is your DataFrame
reshaped_train = pd.melt(X_train, 
                  id_vars=["study_id"],  # Keep study_id as is
                  var_name="categorie",  # New column for the condition names
                  value_name="severity")   # New column for the values

# Display the reshaped DataFrame
# Ensure the file is not open in another program and you have write permissions
# reshaped_train.to_csv(path / "starfor_sorting_new.csv",  index=False)
 
 
X_train = reshaped_train
X_train = X_train.sort_values(by=['study_id', 'categorie'], ignore_index=True)
X_train.head()

In [None]:
# Split the string into two parts: 'spinal_canal_stenosis' and 'l1_l2'
split_columns = X_train['categorie'].str.rsplit('_', n=2)
# Combine the first part as 'condition' and the last two parts as 'level'
X_train['condition'] = split_columns.str[0]
X_train['level'] = split_columns.str[1] + '_' + split_columns.str[2]

X_train 

In [None]:
## adjusting srtings: disenabling the _ and making all lower case
X_train['condition'] = X_train['condition'].str.replace('_', ' ')
X_train['level'] = X_train['level'].str.replace('_', '/')


X_train_coor['condition'] = X_train_coor['condition'].str.lower()
X_train_coor['level'] = X_train_coor['level'].str.lower()
X_train.head()

### Select which kind of damage

In [None]:

# keep only left neural foraminal narrowing
# Filter the DataFrame to keep only rows where the condition is either left_subarticular_stenosis_l4_l5 or right_subarticular_stenosis_l4_l5
filtered_df = X_train[(X_train['condition'].str.contains('left neural foraminal narrowing')) & (X_train['level'] == 'l5/s1')]
print(filtered_df)

X_train = filtered_df


In [None]:
X_train 

In [None]:
unique_study_ids_train = X_train['study_id'].nunique()
unique_study_ids_train_coor = X_train_coor['study_id'].nunique()

print(f"Number of unique study_id in X_train: {unique_study_ids_train}")
print(f"Number of unique study_id in X_train_coor: {unique_study_ids_train_coor}")

In [None]:
 # get rid of persons with no coordinates
X_train = X_train[X_train['study_id'].isin(X_train_coor['study_id'])]

In [None]:
unique_study_ids_train = X_train['study_id'].nunique()
unique_study_ids_train_coor = X_train_coor['study_id'].nunique()

print(f"Number of unique study_id in X_train: {unique_study_ids_train}")
print(f"Number of unique study_id in X_train_coor: {unique_study_ids_train_coor}")

In [None]:
# Perform a left join between merged_df an X_train on multiple variables
merged_df = pd.merge(X_train, X_train_coor, on=['study_id', 'condition', 'level'], how='left')
# Display the merged DataFrame
merged_df.head()

### some statistics of data set

In [None]:
# Count total rows in each DataFrame before the merge
print(f"Rows in X_train: {len(X_train)}")
print(f"Rows in X_train_coor: {len(X_train_coor)}")

# Count total rows in the merged DataFrame
print(f"Rows in merged_df: {len(merged_df)}")

# Find out how many rows have missing values in columns from X_train_coor after the merge
# Assuming columns from X_train_coor start with a common prefix or are listed in a known list
columns_from_X_train_coor = [col for col in X_train_coor.columns if col not in ['study_id', 'condition', 'level']]
missing_rows = merged_df[columns_from_X_train_coor].isnull().all(axis=1).sum()

# Create a new DataFrame for study_ids with at least one row having missing values in columns from X_train_coor
missing_study_ids = merged_df[merged_df[columns_from_X_train_coor].isnull().any(axis=1)]['study_id'].unique()
missing_persons_df = merged_df[merged_df['study_id'].isin(missing_study_ids)]
missing_persons_df = missing_persons_df.sort_values(by=['study_id', 'categorie'], ignore_index=True)
 

print(f"Rows in merged_df without matching rows in X_train_coor: {missing_rows}")
print(f"Rows in merged_df with matches from X_train_coor: {len(merged_df) - missing_rows}")

In [None]:
missing_persons_df.head()

In [None]:
# drop rows with missing values in columns from X_train_coor
merged_df = merged_df.dropna(subset=columns_from_X_train_coor)
display(Markdown('<span style="color:red">later on to take it back to the original shape : 48692</span>'))

In [None]:
merged_df

### Folder with images: preparing paths for merging with main data 

* define which mri type adequate for my 

In [None]:
X_train_des.dtypes

In [None]:
X_train_des = X_train_des[X_train_des['series_description'] == 'Sagittal T2/STIR']
X_train_des.head()

### Paths to images: two options how to do

In [None]:
# using X_train_desÖ Ecxell table is better than taking the path from the folders 
import re

def paths_to_images(df, data_dir):
    image_paths = []
    for study_id, series_id in zip(df['study_id'], df['series_id']):
        study_dir = os.path.join(data_dir, str(study_id))
        series_dir = os.path.join(study_dir, str(series_id))
        
        # List images in the series directory
        images = os.listdir(series_dir)
        # Create full paths for each image
        image_paths.extend([os.path.join(series_dir, img) for img in images])
        
    return image_paths

image_paths = paths_to_images(X_train_des, os.path.join("data/train_images_origin"))

# Sort the image paths to ensure numerical order
def numerical_sort(value):
    parts = re.split(r'(\d+)', value)
    return [int(part) if part.isdigit() else part for part in parts]

image_paths = sorted(image_paths, key=numerical_sort)
image_paths[:75]

df_image_paths = pd.DataFrame(image_paths, columns=['image_path'])
df_image_paths.head()

In [None]:
""" # brauche ich nicht Using real folders and images:  
# path to images folder and images inside the folder
import re
# Define the main directory
main_dir = "data/train_images"

def paths_to_images_2(main_dir):
    image_paths = []
    # Walk through the directory structure starting from the main directory
    for root, subdirs, files in os.walk(main_dir):
        for file in files:
            # Create the full path for each image
            print(file)
            file_path = os.path.join(root, file)
            image_paths.append(file_path)

    return image_paths

paths_to_images_2(main_dir)



################   sort path images in numerical order  
# Define the main directory
main_dir = "data/train_images"

# Get the image paths
image_paths2 = paths_to_images_2(main_dir)

# Sort the image paths to ensure numerical order
def numerical_sort(value):
    parts = re.split(r'(\d+)', value)
    return [int(part) if part.isdigit() else part for part in parts]

image_paths2 = sorted(image_paths2, key=numerical_sort)
image_paths2[:75]

df_image_paths2 = pd.DataFrame(image_paths2, columns=['image_path'])
df_image_paths2.head()
 
"""

In [None]:
df_image_paths['image_path'] = df_image_paths['image_path'].str.replace('\\', '/')
df_image_paths.head()

In [None]:
# Split the path and extract the study_id: make integer type of study_id
def safe_int_conversion(x):
    try:
        return int(x)
    except OverflowError:
        return None  # or handle it in another way

df_image_paths['study_id'] = df_image_paths['image_path'].apply(lambda x: safe_int_conversion(x.split('/')[2]))
df_image_paths['study_id'] = df_image_paths['study_id'].astype(int)
df_image_paths.head(75)

In [None]:
df_image_paths.dtypes

In [None]:
# keep only selected persons 
from help_files._0_definitions import keep_persons, study_ids_to_keep, all_persons
voraluf = keep_persons(df_image_paths, study_ids_to_keep, all_persons)
df_image_paths = voraluf
df_image_paths

#### Join  X_train and df_image_paths

In [None]:
""" some checks 
df_image_paths['count'] = df_image_paths.groupby('study_id').cumcount() + 1
df_image_paths['count_per_person'] = df_image_paths.groupby('study_id')['study_id'].transform('count')
df_image_paths['mean_per_of_raws'] = df_image_paths.groupby('study_id').first()['count_per_person'].mean()
df_image_paths['first_row_flag'] = df_image_paths.groupby('study_id').cumcount().apply(lambda x: 1 if x == 0 else 0)
df_image_paths['mean_per_of_raws2'] = np.where(df_image_paths['first_row_flag'] == 1, df_image_paths['count_per_person'].mean(), np.nan)
df_image_paths.head(75)

print("at average there are 81 images per person")
"""

### merign: 1:to many left join: df_image_paths merged to merged_df

In [None]:
# join 1 to many: X_train and df_image_paths
df_end = merged_df.merge(df_image_paths, on='study_id', how='left')
df_end.head()
df_end = df_end.drop(columns=['categorie', 'instance_number'])
df_end.shape

# Define data_path_vor
 
 
df_end.head()   

In [None]:
df_end = df_end.drop(columns=['level', 'series_id'])
df_end.head()

In [None]:
before = df_end.shape[0]


# Option 1: Drop rows with NaN values in image_path
df_end = df_end.dropna(subset=['image_path'])
print(df_end[df_end['image_path'].isna()])
after = df_end.shape[0]
print(f"Number of rows dropped: {before - after}") 

In [None]:
# Convert image_path to string if necessary
df_end['image_path'] = df_end['image_path'].astype(str)
# Print out the image_path values to inspect
print(df_end['image_path'].head())

In [None]:
df_end.dtypes

In [None]:
from help_files._0_definitions import generate_sample 
print(full_sample)

df_end = generate_sample(df_end)

### the code below only once: it trasforms the images and saves them (see _02_transform_images): however it transforms only 'Sagittal T2/STIR' pictures. But for the whole sample it were just 7 minutes

In [None]:
# transform images: already predtransforemed
"""
with open("help_files/_02_transform_images.py") as file:
    exec(file.read())
""" 
 

### select randomly one raw for each study_id

In [None]:
# Randomly select one row from each study_id
random_sample_df = df_end.groupby('study_id').sample(n=1, random_state=RSEED)

random_sample_df

### select randomly 10 persons for severity with 1 2 3 from random_sample

In [None]:
number_persons_train = 100

In [None]:
# Function to get random samples for each severity level
def get_random_samples(df, severity, number, random_state=RSEED):
    return df[df['severity'] == severity].sample(n=number_persons_train, random_state=random_state)

# Get random samples for each severity level
random_sample_severity_1 = get_random_samples(random_sample_df, severity=1.0, number=number_persons_train)
random_sample_severity_2 = get_random_samples(random_sample_df, severity=2.0, number=number_persons_train)
random_sample_severity_3 = get_random_samples(random_sample_df, severity=3.0, number=number_persons_train)

# Combine the samples into one DataFrame
random_samples_combined = pd.concat([random_sample_severity_1, random_sample_severity_2, random_sample_severity_3])

# Display the combined random samples
random_samples_combined.head()
random_samples_combined = random_samples_combined.sort_values(by='study_id').reset_index(drop=True)
random_samples_combined.head()



In [None]:
df_end = random_samples_combined
# Configure pandas display options
pd.set_option("display.width", 1000)  # Set a large width to prevent line wrapping

# Assuming df_end is your DataFrame
print(df_end.to_string(index=False, header=True))

In [None]:
df_end.dtypes
df_end['condition'] = df_end['condition'].astype('category').cat.codes

## first try 

In [None]:
df_end = random_samples_combined
 
# Assuming df_end is your DataFrame
print(df_end.to_string(index=False, header=True))

In [None]:
 
df_end['severity'] = df_end['severity'] - 1  # Subtracting 1 from each severity level

### with crossvalidation

In [None]:
import os
import pydicom
import torch
from torch.utils.data import Dataset
import cv2

class TransformedMRIDataset(Dataset):
    def __init__(self, data, output_folder):
        self.data = data
        self.output_folder = os.path.abspath(output_folder)  # Ensure the output folder path is absolute

    def __getitem__(self, index):
        row = self.data.iloc[index]
        image_path = os.path.abspath(row['image_path'])  # Make sure image_path is absolute

        # Calculate relative path from the original image path to ensure correct subfolder structure
        relative_path = os.path.relpath(image_path, os.path.commonpath([self.output_folder, image_path]))
        transformed_image_path = os.path.join(self.output_folder, relative_path)

        # Load the transformed DICOM image
        dicom_image = pydicom.dcmread(transformed_image_path)

        # Check if pixel data exists
        if not hasattr(dicom_image, 'PixelData'):
            raise ValueError(f"No pixel data found for image {transformed_image_path}")

        # Convert DICOM image to numpy array and ensure RGB format
        image_array = dicom_image.pixel_array
        if len(image_array.shape) == 2:  # Convert grayscale to RGB
            image_rgb = cv2.cvtColor(image_array, cv2.COLOR_GRAY2RGB)
        else:
            image_rgb = image_array

        # Convert numpy array to tensor
        image_tensor = torch.tensor(image_rgb).permute(2, 0, 1).float() / 255.0  # Normalize to [0, 1]

        # Extract the severity label
        label = torch.tensor(row['severity']).long()

        return image_tensor, label

    def __len__(self):
        return len(self.data)


In [None]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
import torchvision.models as models

# Specify the root folder containing the transformed images in subdirectories
transformed_folder = r'C:\Users\HP1\Desktop\Spiced\capstone-project\data\train_images_origin'

# Create the dataset and dataloader
dataset = TransformedMRIDataset(data=df_end, output_folder=transformed_folder)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

# Set up ResNet-50 model and modify the final layer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_classes = df_end['severity'].nunique()

model = models.resnet50(weights='IMAGENET1K_V1')
model.fc = nn.Linear(model.fc.in_features, num_classes)
model.to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
num_epochs = 10

# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    epoch_loss = running_loss / len(dataloader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')

print("Training complete!")


In [None]:
s

In [None]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.models as models
from torchvision import transforms
import pydicom
import cv2
import pandas as pd

# Define the transform with augmentation: I already tranformed i tbfore 
"""
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    transforms.RandomRotation(10),       # Randomly rotate the image by ±10 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Adjust color properties
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
"""

class MRIDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

        # Ensure severity is in integer format
        self.data['severity'] = self.data['severity'].astype(int)

    def __getitem__(self, index):
        row = self.data.iloc[index]
        image_path = row['image_path']
        label = row['severity']  # Use severity for the label

        dicom_image = pydicom.dcmread(image_path)
        image = dicom_image.pixel_array.astype(float)
        image = (image / image.max() * 255).astype('uint8')  # Normalize

        # Convert the image to RGB if it is grayscale
        if len(image.shape) == 2:  # Grayscale
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)

        # Apply transformations including augmentation
        image_tensor = self.transform(image) if self.transform else torch.from_numpy(image).permute(2, 0, 1)

        return image_tensor, torch.tensor(label).long()  # Return label as tensor

    def __len__(self):
        return len(self.data)

# Create the MRIDataset from the original DataFrame
dataset = MRIDataset(data=df_end, transform=transform)

# Split the dataset into training and validation sets
train_size = int(0.8 * len(dataset))  # 80% for training
val_size = len(dataset) - train_size  # 20% for validation
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Create DataLoaders for training and validation
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)

# Load ResNet-50 and set up for classification
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_classes = df_end['severity'].nunique()  # Number of unique severities

model = models.resnet50(weights='IMAGENET1K_V1')  # Updated for latest versions
model.fc = nn.Linear(model.fc.in_features, num_classes)  # Modify the final layer
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10

# Training loop with validation
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()  # Zero the parameter gradients
        outputs = model(images)  # Forward pass
        loss = criterion(outputs, labels)  # Calculate loss
        loss.backward()  # Backward pass
        optimizer.step()  # Optimize the weights

        running_loss += loss.item()  # Accumulate loss

    epoch_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {epoch_loss:.4f}')

    # Validation phase
    model.eval()  # Set model to evaluation mode
    val_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():  # Disable gradient calculation
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)  # Forward pass
            loss = criterion(outputs, labels)  # Calculate loss
            
            val_loss += loss.item()  # Accumulate validation loss
            _, predicted = torch.max(outputs.data, 1)  # Get the class with the highest score
            total += labels.size(0)  # Total number of labels
            correct += (predicted == labels).sum().item()  # Count correct predictions

    val_loss /= len(val_loader)
    epoch_loss = running_loss / len(dataloader)
    
    accuracy = 100 * correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')
    print(f'Validation Loss: {val_loss:.4f}, Accuracy: {accuracy:.2f}%')

print("Training complete!")


In [None]:
s

### without crossvalidation

In [None]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from torchvision import transforms
import pydicom
import cv2
import pandas as pd

# Define the transform with augmentation
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    transforms.RandomRotation(10),       # Randomly rotate the image by ±10 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Adjust color properties
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class MRIDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

        # Ensure severity is in integer format
        self.data['severity'] = self.data['severity'].astype(int)

    def __getitem__(self, index):
        row = self.data.iloc[index]
        image_path = row['image_path']
        x, y = row['x'], row['y']
        label = row['severity']  # Use severity for the label

        dicom_image = pydicom.dcmread(image_path)
        image = dicom_image.pixel_array.astype(float)
        image = (image / image.max() * 255).astype('uint8')  # Normalize

        # Convert the image to RGB
        if len(image.shape) == 2:  # Grayscale
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)

        # Apply transformations including augmentation
        image_tensor = self.transform(image) if self.transform else torch.from_numpy(image).permute(2, 0, 1)

        return image_tensor, torch.tensor(label).long()  # Return label as tensor

    def __len__(self):
        return len(self.data)

# Create the dataset and DataLoader
dataset = MRIDataset(data=df_end, transform=transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)  # Adjust batch size as needed

# Load ResNet-50 and set up for classification
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_classes = df_end['severity'].nunique()  # Number of unique severities

model = models.resnet50(weights='IMAGENET1K_V1')  # Updated for latest versions
model.fc = nn.Linear(model.fc.in_features, num_classes)  # Modify the final layer
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
num_epochs = 10

# Training loop
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()  # Zero the parameter gradients
        outputs = model(images)  # Forward pass
        loss = criterion(outputs, labels)  # Calculate loss
        loss.backward()  # Backward pass
        optimizer.step()  # Optimize the weights

        running_loss += loss.item()  # Accumulate loss

    epoch_loss = running_loss / len(dataloader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')

print("Training complete!")
