Packages:

In [1]:
import glob
import os
import warnings


import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import pydicom
from pydicom.data import get_testdata_files
from sklearn.model_selection import train_test_split

from IPython.display import Markdown, display

 # Import functions from the module
import importlib
import help_files._0_definitions 
import  help_files._1_visuals_script
# import  help_files._01_load_data
 # Reload the module to apply the changes to the script
importlib.reload(help_files._0_definitions)
importlib.reload(help_files._1_visuals_script)
# importlib.reload(help_files._01_load_data)
import  help_files._1_visuals_script  as pauls_vs
# Group by 'condition', 'level', and 'severity' and count occurrences
from help_files._0_definitions import count_severity_by_condition_level 
# Define the path
from pathlib import Path

pd.set_option("display.width", 1000)  # Set a large width to prevent line wrapping
 

In [31]:
### In definitions are all the functions that are used in the notebook and globals
with open("help_files/_0_definitions.py") as file:
    exec(file.read())

In [32]:
# Save the filtered dataframes to CSV files
dataframes = ["df_end"]
file_names = ["df_end.csv"]
 
# Load the data from _01_load_data
dataframes = [pd.read_csv(data_path_vor / file_name) for file_name in file_names]
df_end = dataframes[0]


In [33]:
df_end.dtypes

study_id        int64
severity      float64
condition       int64
level          object
series_id     float64
x             float64
y             float64
image_path     object
dtype: object

In [34]:
# Randomly sample 100 rows from random_samples_combined
random_samples_test_check = df_end.sample(n=100, random_state=RSEED)
random_samples_test_check


Unnamed: 0,study_id,severity,condition,level,series_id,x,y,image_path
1801,1261271580,2.0,0,l5/s1,5.634637e+08,154.576328,221.944761,data/train_images_origin/1261271580/813965073/...
1190,808539750,2.0,0,l5/s1,3.152297e+09,464.810632,624.751228,data/train_images_origin/808539750/412413083/4...
1817,1271033295,2.0,0,l5/s1,1.040397e+09,144.600457,244.992390,data/train_images_origin/1271033295/2054979604...
251,178041181,1.0,0,l5/s1,3.904103e+09,352.485414,427.421214,data/train_images_origin/178041181/2495441739/...
2505,1746166687,1.0,0,l5/s1,1.482487e+07,264.814679,368.499083,data/train_images_origin/1746166687/2015196917...
...,...,...,...,...,...,...,...,...
416,296314829,1.0,0,l5/s1,3.788461e+09,138.819320,268.622540,data/train_images_origin/296314829/3463650248/...
2631,1859534255,0.0,0,l5/s1,3.931601e+09,238.748092,307.786260,data/train_images_origin/1859534255/1445343658...
2291,1588228644,0.0,0,l5/s1,8.639016e+08,138.507463,198.339213,data/train_images_origin/1588228644/1586605256...
174,109677683,2.0,0,l5/s1,1.241325e+09,308.612907,567.957571,data/train_images_origin/109677683/714837857/6...


Restore parameters and metrtics from mlflow

I should take prdicted probabilities and not predicted classes: output scores (logits) are converted into probabilities using the softmax function

In [35]:
import mlflow
import mlflow.pytorch
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.models as models
from torchvision import transforms
import pydicom
import cv2
import pandas as pd
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np

# Set the experiment name
experiment_name = "Resnet50_MRI_Classification"
mlflow.set_experiment(experiment_name)

# Specify the run ID of the logged model
run_id = "f390913c59d642329c86d0f52b943062"  # Replace with your actual run ID
model_uri = f"runs:/{run_id}/model"

# Load the model
model = mlflow.pytorch.load_model(model_uri)
model.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))  # Move model to appropriate device
model.eval()  # Set the model to evaluation mode

# Prepare data
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class MRIDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform
        self.data['severity'] = self.data['severity'].astype(int)

    def __getitem__(self, index):
        row = self.data.iloc[index]
        image_path = row['image_path']
        label = row['severity']
        dicom_image = pydicom.dcmread(image_path)
        image = dicom_image.pixel_array.astype(float)
        image = (image / image.max() * 255).astype('uint8')
        if len(image.shape) == 2:  # Grayscale
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
        image_tensor = self.transform(image) if self.transform else torch.from_numpy(image).permute(2, 0, 1)
        return image_tensor, torch.tensor(label).long()

    def __len__(self):
        return len(self.data)

predicting probabilities for test data

In [36]:
test_data = random_samples_test_check

test_dataset = MRIDataset(data=test_data, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Inference loop with probability extraction
results = []
probabilities_list = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)  # Ensure labels are also on the correct device
        outputs = model(images)
        probabilities = torch.softmax(outputs, dim=1)  # Calculate class probabilities
        _, predicted_classes = torch.max(outputs, 1)
        
        # Append predictions and probabilities
        results.append(predicted_classes.item())
        probabilities_list.append(probabilities.cpu().numpy())



Using device: cpu


In [37]:
# Print predicted classes and their probabilities
for i, (pred, probs) in enumerate(zip(results, probabilities_list)):
    print(f"Test image {i}: Predicted class {pred}, Probabilities: {probs}")

# Additional code to plot the confusion matrix can stay as-is:
true_labels = []
predicted_labels = []

for images, labels in test_loader:
    images = images.to(device)
    true_labels.extend(labels.numpy())

    with torch.no_grad():
        outputs = model(images)
        _, predicted_classes = torch.max(outputs, 1)
        predicted_labels.extend(predicted_classes.cpu().numpy())

true_labels = np.array(true_labels)
predicted_labels = np.array(predicted_labels)

print("True Labels Unique Values:", np.unique(true_labels))
print("Predicted Labels Unique Values:", np.unique(predicted_labels))


Test image 0: Predicted class 1, Probabilities: [[0.09760511 0.4604072  0.4419877 ]]
Test image 1: Predicted class 2, Probabilities: [[0.22035384 0.3208469  0.45879933]]
Test image 2: Predicted class 2, Probabilities: [[0.21413687 0.2634137  0.5224495 ]]
Test image 3: Predicted class 2, Probabilities: [[0.17551206 0.2931106  0.5313774 ]]
Test image 4: Predicted class 0, Probabilities: [[0.41742143 0.19516578 0.38741276]]
Test image 5: Predicted class 2, Probabilities: [[0.10754652 0.41178095 0.4806725 ]]
Test image 6: Predicted class 2, Probabilities: [[0.06741323 0.41644233 0.51614445]]
Test image 7: Predicted class 2, Probabilities: [[0.03414727 0.3045025  0.6613503 ]]
Test image 8: Predicted class 2, Probabilities: [[0.10371006 0.3035752  0.5927148 ]]
Test image 9: Predicted class 2, Probabilities: [[0.12966274 0.3148868  0.5554505 ]]
Test image 10: Predicted class 0, Probabilities: [[0.45072848 0.3199081  0.22936341]]
Test image 11: Predicted class 2, Probabilities: [[0.18368869 0.

In [38]:
from sklearn.metrics import confusion_matrix
import pandas as pd
import numpy as np

# Generate the confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)

# Create a DataFrame from the confusion matrix
conf_matrix_df = pd.DataFrame(
    conf_matrix, 
    index=[f"Actual {i}" for i in range(len(conf_matrix))], 
    columns=[f"Predicted {i}" for i in range(len(conf_matrix[0]))]
)

# Print the confusion matrix DataFrame
print(conf_matrix_df)

# Optionally, display it using a more formatted view (e.g., in Jupyter Notebook)
conf_matrix_df.style.background_gradient(cmap='Blues')


          Predicted 0  Predicted 1  Predicted 2
Actual 0            3            8           20
Actual 1            4            7           24
Actual 2            5            6           23


Unnamed: 0,Predicted 0,Predicted 1,Predicted 2
Actual 0,3,8,20
Actual 1,4,7,24
Actual 2,5,6,23


In [39]:
from sklearn.metrics import precision_score, recall_score, accuracy_score
# Calculate precision, recall, and accuracy using mean_probabilities_df
precision = precision_score(true_labels, predicted_labels, average='weighted')
recall = recall_score(true_labels, predicted_labels, average='weighted')
accuracy = accuracy_score(true_labels, predicted_labels)

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"Accuracy: {accuracy:.4f}")


Precision: 0.3109
Recall: 0.3300
Accuracy: 0.3300


In [40]:
ssss

NameError: name 'ssss' is not defined

These ones are predicted classes: 

In [None]:
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.models as models
from torchvision import transforms
import pydicom
import cv2
import pandas as pd
import numpy as np
import mlflow
import mlflow.pytorch
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Set the experiment name (optional, helps in identifying runs)
experiment_name = "Resnet50_MRI_Classification"
mlflow.set_experiment(experiment_name)

# Specify the run ID of the logged model
run_id = "f390913c59d642329c86d0f52b943062"  # Replace with your actual run ID

# Create the model URI
model_uri = f"runs:/{run_id}/model"

# Load the model
model = mlflow.pytorch.load_model(model_uri)

# Now you can use the model for inference or evaluation
model.eval()  # Set the model to evaluation mode

# Define the transform with augmentation: I already transformed it before 
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    #transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    #transforms.RandomRotation(10),       # Randomly rotate the image by ±10 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Adjust color properties
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Define the Dataset class
class MRIDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

        # Ensure severity is in integer format
        self.data['severity'] = self.data['severity'].astype(int)

    def __getitem__(self, index):
        row = self.data.iloc[index]
        image_path = row['image_path']
        label = row['severity']  # Use severity for the label

        dicom_image = pydicom.dcmread(image_path)
        image = dicom_image.pixel_array.astype(float)
        image = (image / image.max() * 255).astype('uint8')  # Normalize

        # Convert the image to RGB if it is grayscale
        if len(image.shape) == 2:  # Grayscale
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)

        # Apply transformations including augmentation
        image_tensor = self.transform(image) if self.transform else torch.from_numpy(image).permute(2, 0, 1)

        return image_tensor, torch.tensor(label).long()  # Return label as tensor

    def __len__(self):
        return len(self.data)

# Sample the test DataFrame
test_df = df_end.sample(n=300, random_state=RSEED)  # Replace RSEED with your seed

# Create the test dataset and loader
test_dataset = MRIDataset(data=test_df, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# Check if a GPU is available and set the device accordingly
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Initialize lists to store true labels and predictions
true_labels = []
predicted_labels = []

# Collect true labels and predicted labels
for images, labels in test_loader:
    images = images.to(device)

    # Store the true labels
    true_labels.extend(labels.numpy())  # Convert to numpy and extend the list

    with torch.no_grad():
        outputs = model(images)
        _, predicted_classes = torch.max(outputs, 1)
        predicted_labels.extend(predicted_classes.cpu().numpy())  # Move to CPU and convert to list

# Convert to numpy arrays for confusion matrix
true_labels = np.array(true_labels)
predicted_labels = np.array(predicted_labels)

# Create a DataFrame with true and predicted values
results_df = pd.DataFrame({
    'True Severity': true_labels,
    'Predicted Severity': predicted_labels
})

# Display the DataFrame
print(results_df.head())  # Show the first few rows of the DataFrame

# Optionally, save the DataFrame to a CSV file for further analysis
results_df.to_csv('predictions_results.csv', index=False)

# Check the unique values in the true and predicted labels
print("True Labels Unique Values:", np.unique(true_labels))
print("Predicted Labels Unique Values:", np.unique(predicted_labels))

# Generate and display the confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1, 2])
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()


In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Extract true labels and predicted labels from results_df
true_labels = results_df['True Severity']
predicted_labels = results_df['Predicted Severity']

# Calculate the confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)

# Display the confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1, 2])
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix from results_df")
plt.show()

In [None]:
test_df =df_end.sample(n=300, random_state=RSEED)

import mlflow
import mlflow.pytorch

# Set the experiment name (optional, helps in identifying runs)
experiment_name = "Resnet50_MRI_Classification"
mlflow.set_experiment(experiment_name)

# Specify the run ID of the logged model
run_id = "f390913c59d642329c86d0f52b943062"  # Replace with your actual run ID

# Create the model URI
model_uri = f"runs:/{run_id}/model"

# Load the model
model = mlflow.pytorch.load_model(model_uri)

# Now you can use the model for inference or evaluation
model.eval()  # Set the model to evaluation mode
# Get all runs for the experiment
runs = mlflow.search_runs(experiment_names=[experiment_name])

print(runs.filter(like='params.').to_string(index=False))
# Display all metrics
metrics_columns = [col for col in runs.columns if col.startswith('metrics.')]
print(runs[metrics_columns].to_string(index=False))

import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.models as models
from torchvision import transforms
import pydicom
import cv2
import pandas as pd
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
 
# Define the transform with augmentation: I already tranformed i tbfore 

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    #transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    #transforms.RandomRotation(10),       # Randomly rotate the image by ±10 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Adjust color properties
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


class MRIDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

        # Ensure severity is in integer format
        self.data['severity'] = self.data['severity'].astype(int)

    def __getitem__(self, index):
        row = self.data.iloc[index]
        image_path = row['image_path']
        label = row['severity']  # Use severity for the label

        dicom_image = pydicom.dcmread(image_path)
        image = dicom_image.pixel_array.astype(float)
        image = (image / image.max() * 255).astype('uint8')  # Normalize

        # Convert the image to RGB if it is grayscale
        if len(image.shape) == 2:  # Grayscale
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)

        # Apply transformations including augmentation
        image_tensor = self.transform(image) if self.transform else torch.from_numpy(image).permute(2, 0, 1)

        return image_tensor, torch.tensor(label).long()  # Return label as tensor

    def __len__(self):
        return len(self.data)

# Replace 'df_test' with your actual DataFrame containing test data
test_dataset = MRIDataset(data=test_df, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)  # Adjust batch size as needed
test_dataset
test_loader

run_id = "f390913c59d642329c86d0f52b943062"  # Replace with your actual run ID
model_uri = f"runs:/{run_id}/model"
model = mlflow.pytorch.load_model(model_uri)
model.eval()  # Set the model to evaluation mode

import torch

# Check if a GPU is available and set the device accordingly
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f"Using device: {device}")
results = []
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = torch.tensor(labels)  # Convert labels to tensor
        outputs = model(images)
        _, predicted_classes = torch.max(outputs, 1)
        
        # Append predictions with corresponding image paths or indices
        results.append(predicted_classes.item())

# Print or save predictions as needed
for i, pred in enumerate(results):
    print(f"Test image {i}: Predicted class {pred}")



# Initialize lists to store true labels and predictions
true_labels = []
predicted_labels = []

# Collect true labels and predicted labels
for images, labels in test_loader:
    images = images.to(device)

    # Store the true labels
    true_labels.extend(labels.numpy())  # Convert to numpy and extend the list

    with torch.no_grad():
        outputs = model(images)
        _, predicted_classes = torch.max(outputs, 1)
        predicted_labels.extend(predicted_classes.cpu().numpy())  # Move to CPU and convert to list

# Convert to numpy arrays for confusion matrix
true_labels = np.array(true_labels)
predicted_labels = np.array(predicted_labels)

# Check the unique values in the true and predicted labels
print("True Labels Unique Values:", np.unique(true_labels))
print("Predicted Labels Unique Values:", np.unique(predicted_labels))


In [None]:
# Generate the confusion matrix, explicitly specifying labels
cm = confusion_matrix(true_labels, predicted_labels, labels=[0, 1, 2])

# Display the confusion matrix with fixed display labels
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1, 2])
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

 
# Calculate accuracy
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Accuracy: {accuracy:.4f}")

# Calculate precision
precision = precision_score(true_labels, predicted_labels, average='weighted')
print(f"Precision: {precision:.4f}")

# Calculate recall
recall = recall_score(true_labels, predicted_labels, average='weighted')
print(f"Recall: {recall:.4f}")