# Knee K-S Score Detection
This Notebook utilize the information of Knee 
### Author: Jiaqi Chen & Rongbin Ye 
### Date: 05/18/2025

In [2]:
## Importing libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import logging
import os
from datetime import datetime
import numpy as np
import torch.nn.functional as F
from torch.optim.lr_scheduler import ReduceLROnPlateau
import seaborn as sns
from sklearn.metrics import confusion_matrix, cohen_kappa_score
import pandas as pd
from torchmetrics import Precision, Recall, F1Score
import json

## Loading the data using the existing tools
from torchvision.datasets import ImageFolder
from torchvision import transforms

In [3]:
### Set up logging configuration
def setup_logging():
    # Create logs directory if it doesn't exist
    if not os.path.exists('logs'):
        os.makedirs('logs')
    
    # Create a timestamp for the log file
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    log_filename = f'logs/data_loading_{timestamp}.log'
    
    # Configure logging
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler(log_filename),
            logging.StreamHandler()  # This will also print to console
        ]
    )
    return logging.getLogger(__name__)

# Initialize logger
logger = setup_logging()


In [4]:
# Define the transforms
## Transformation based on the X-ray:
### this step will add some additional Noises/distorition
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(20),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Resize((128, 128)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Base path for the dataset
base_path = "/Users/mega_potato/Downloads/Side_Project/CNN_Toy/data/Digital Knee X-ray Images/Digital Knee X-ray Images/Knee X-ray Images"

# Paths for both expert assessments
expert1_path = os.path.join(base_path, "MedicalExpert-I/MedicalExpert-I")
expert2_path = os.path.join(base_path, "MedicalExpert-II/MedicalExpert-II")

# Load both datasets
try:
    dataset_expert1 = ImageFolder(
        expert1_path,
        transform=train_transform,
    )
    logger.info(f"Expert 1 Dataset loaded successfully")
    logger.info(f"Expert 1 classes: {dataset_expert1.classes}")
    logger.info(f"Expert 1 class to index mapping: {dataset_expert1.class_to_idx}")
    logger.info(f"Expert 1 total samples: {len(dataset_expert1)}")

    dataset_expert2 = ImageFolder(
        expert2_path,
        transform=train_transform,
    )
    logger.info(f"Expert 2 Dataset loaded successfully")
    logger.info(f"Expert 2 classes: {dataset_expert2.classes}")
    logger.info(f"Expert 2 class to index mapping: {dataset_expert2.class_to_idx}")
    logger.info(f"Expert 2 total samples: {len(dataset_expert2)}")

except Exception as e:
    logger.error(f"Error loading datasets: {str(e)}")

def print_dataset_info(dataset, expert_name):
    """Helper function to print and compare dataset information"""
    logger.info(f"\n{expert_name} Dataset Information:")
    logger.info("-------------------------")
    
    # Get sample image and label
    image, label = next(iter(dataset))
    logger.info(f"Sample image shape: {image.shape}")
    logger.info(f"Sample label: {label} (Class: {dataset.classes[label]})")
    
    # Get class distribution
    class_counts = {dataset.classes[i]: 0 for i in range(len(dataset.classes))}
    for _, label in dataset:
        class_counts[dataset.classes[label]] += 1
    
    logger.info("\nClass distribution:")
    for class_name, count in class_counts.items():
        logger.info(f"{class_name}: {count} images")

# Print information for both datasets
print_dataset_info(dataset_expert1, "Medical Expert 1")
print_dataset_info(dataset_expert2, "Medical Expert 2")

# Optional: Check if the class labels match between experts
if dataset_expert1.classes == dataset_expert2.classes:
    logger.info("\nBoth experts use the same class labels")
else:
    logger.info("\nWarning: Experts use different class labels:")
    logger.info(f"Expert 1: {dataset_expert1.classes}")
    logger.info(f"Expert 2: {dataset_expert2.classes}")

2025-05-18 17:45:46,140 - INFO - Expert 1 Dataset loaded successfully
2025-05-18 17:45:46,141 - INFO - Expert 1 classes: ['0Normal', '1Doubtful', '2Mild', '3Moderate', '4Severe']
2025-05-18 17:45:46,142 - INFO - Expert 1 class to index mapping: {'0Normal': 0, '1Doubtful': 1, '2Mild': 2, '3Moderate': 3, '4Severe': 4}
2025-05-18 17:45:46,143 - INFO - Expert 1 total samples: 1650
2025-05-18 17:45:46,154 - INFO - Expert 2 Dataset loaded successfully
2025-05-18 17:45:46,154 - INFO - Expert 2 classes: ['0Normal', '1Doubtful', '2Mild', '3Moderate', '4Severe']
2025-05-18 17:45:46,154 - INFO - Expert 2 class to index mapping: {'0Normal': 0, '1Doubtful': 1, '2Mild': 2, '3Moderate': 3, '4Severe': 4}
2025-05-18 17:45:46,155 - INFO - Expert 2 total samples: 1650
2025-05-18 17:45:46,155 - INFO - 
Medical Expert 1 Dataset Information:
2025-05-18 17:45:46,155 - INFO - -------------------------
2025-05-18 17:45:46,172 - INFO - Sample image shape: torch.Size([3, 128, 128])
2025-05-18 17:45:46,173 - INFO

In [None]:
## Testing module for reading ##

In [None]:
####### Testing the dataset loading #########################################################
def test_dataset_loading(dataset, expert_name):
    """
    Test function to verify dataset loading and display key information
    """
    logger.info(f"\nTesting {expert_name} dataset...")
    
    # 1. Test dataset size
    assert len(dataset) > 0, "Dataset is empty!"
    logger.info(f"✓ Dataset contains {len(dataset)} samples")
    
    # 2. Test if we can access an image and its label
    sample_img, sample_label = dataset[0]
    logger.info(f"✓ First image shape: {sample_img.shape}")
    logger.info(f"✓ First image label: {dataset.classes[sample_label]}")
    
    # 3. Verify image dimensions
    assert sample_img.shape == (3, 128, 128), f"Unexpected image shape: {sample_img.shape}"
    logger.info("✓ Image dimensions are correct (3, 128, 128)")
    
    # 4. Print class distribution
    class_counts = {dataset.classes[i]: 0 for i in range(len(dataset.classes))}
    for _, label in dataset:
        class_counts[dataset.classes[label]] += 1
    
    logger.info("\nClass distribution:")
    for class_name, count in class_counts.items():
        logger.info(f"{class_name}: {count} images")
    
    logger.info(f"\n{expert_name} dataset testing completed successfully! ✓")
    return True

# Run tests for both datasets
try:
    test_dataset_loading(dataset_expert1, "Medical Expert 1")
    test_dataset_loading(dataset_expert2, "Medical Expert 2")
except Exception as e:
    logger.error(f"Dataset testing failed: {str(e)}")

#### END OF TESTING ############################################################################

In [7]:
###### Creating the model #####################################################################
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class AttentionBlock(nn.Module):
    def __init__(self, in_channels):
        super().__init__()
        self.attention = nn.Sequential(
            nn.Conv2d(in_channels, in_channels // 8, kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(in_channels // 8, in_channels, kernel_size=1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        attention_weights = self.attention(x)
        return x * attention_weights

class OrdinalRegressionLoss(nn.Module):
    def __init__(self, num_classes, weights=None):
        super().__init__()
        self.num_classes = num_classes
        # Weights for each boundary between classes
        self.weights = weights if weights is not None else torch.ones(num_classes - 1)
    
    def forward(self, predictions, targets):
        # Convert targets to ordinal encoding
        ordinal_targets = torch.zeros(targets.size(0), self.num_classes - 1)
        for i in range(targets.size(0)):
            ordinal_targets[i, :targets[i]] = 1
        
        ordinal_targets = ordinal_targets.to(predictions.device)
        
        # Calculate binary cross entropy for each ordinal level
        loss = F.binary_cross_entropy_with_logits(
            predictions, ordinal_targets, 
            reduction='none'
        )
        
        # Apply class weights
        weighted_loss = loss * self.weights.to(predictions.device)
        return weighted_loss.mean()

class ImprovedNet(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        
        # Initial convolution with larger kernel
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        # Residual layers with attention
        self.layer1 = nn.Sequential(
            ResidualBlock(64, 64),
            ResidualBlock(64, 64),
            AttentionBlock(64)
        )
        self.layer2 = nn.Sequential(
            ResidualBlock(64, 128, stride=2),
            ResidualBlock(128, 128),
            AttentionBlock(128)
        )
        self.layer3 = nn.Sequential(
            ResidualBlock(128, 256, stride=2),
            ResidualBlock(256, 256),
            AttentionBlock(256)
        )
        
        # Global average pooling and classifier
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(256, num_classes - 1)  # One less output for ordinal regression
        
        # Initialize weights
        self._initialize_weights()
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
    
    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.maxpool(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = self.fc(x)
        return x

In [8]:
# For this example, we'll use Expert 1's dataset
# Split dataset into train and test sets
from torch.utils.data import random_split, DataLoader

# Set random seed for reproducibility
torch.manual_seed(42)

# Calculate split sizes (80% train, 20% test)
total_size = len(dataset_expert1)
train_size = int(0.8 * total_size)
test_size = total_size - train_size

# Split the dataset
train_dataset, test_dataset = random_split(dataset_expert1, [train_size, test_size])

# Create data loaders
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

logger.info(f"\nDataset split:")
logger.info(f"Total samples: {total_size}")
logger.info(f"Training samples: {len(train_dataset)}")
logger.info(f"Test samples: {len(test_dataset)}")

2025-05-18 17:48:15,508 - INFO - 
Dataset split:
2025-05-18 17:48:15,509 - INFO - Total samples: 1650
2025-05-18 17:48:15,509 - INFO - Training samples: 1320
2025-05-18 17:48:15,510 - INFO - Test samples: 330


In [9]:
# Set up the weights based on the impacts:
# Initialize model with clinical priorities
# Weights for boundaries between classes (4 boundaries for 5 classes)
class_weights = torch.tensor([2.0, 1.5, 1.5, 2.0])  # Adjusted for ordinal boundaries
criterion = OrdinalRegressionLoss(num_classes=5, weights=class_weights)
net = ImprovedNet(num_classes=5)
optimizer = optim.AdamW(net.parameters(), lr=0.001, weight_decay=0.01)
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=3)

In [10]:
# Set device
device = torch.device('cpu')
net = net.to(device)

def evaluate_clinical_metrics(model, data_loader, device, class_names):
    """
    Evaluate model with emphasis on clinical priorities
    """
    model.eval()
    all_preds = []
    all_labels = []
    
    # Initialize metrics
    precision = Precision(task="multiclass", num_classes=5, average=None).to(device)
    recall = Recall(task="multiclass", num_classes=5, average=None).to(device)
    f1 = F1Score(task="multiclass", num_classes=5, average=None).to(device)
    
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            
            # Convert ordinal outputs to class predictions
            probs = torch.sigmoid(outputs)
            preds = torch.sum(probs > 0.5, dim=1)
            preds = torch.clamp(preds, 0, 4)  # Ensure predictions are within valid range
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
            # Update metrics
            precision(preds, labels)
            recall(preds, labels)
            f1(preds, labels)
    
    # Convert to numpy arrays
    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)
    
    # Calculate confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    
    # Calculate Cohen's Kappa with quadratic weights
    kappa = cohen_kappa_score(all_labels, all_preds, weights='quadratic')
    
    # Calculate Mean Absolute Error
    mae = np.mean(np.abs(all_preds - all_labels))
    
    # Get per-class metrics
    precision_values = precision.compute()
    recall_values = recall.compute()
    f1_values = f1.compute()
    
    # Create detailed metrics dictionary
    metrics = {
        'per_class_metrics': {
            class_name: {
                'precision': float(precision_values[i]),
                'recall': float(recall_values[i]),
                'f1': float(f1_values[i]),
                'support': int(np.sum(all_labels == i))
            } for i, class_name in enumerate(class_names)
        },
        'confusion_matrix': cm.tolist(),
        'normalized_confusion_matrix': cm_normalized.tolist(),
        'cohen_kappa': float(kappa),
        'mae': float(mae),
        'clinical_priorities': {
            'severe_recall': float(recall_values[4]),  # KL4/Severe
            'normal_precision': float(precision_values[0]),  # KL0/Normal
            'moderate_metrics': {
                'recall': float(recall_values[3]),  # KL3/Moderate
                'precision': float(precision_values[3])
            }
        }
    }
    
    # Calculate Clinical Utility Score (weighted average of priority metrics)
    clinical_utility = (
        0.4 * metrics['clinical_priorities']['severe_recall'] +
        0.3 * metrics['clinical_priorities']['normal_precision'] +
        0.2 * metrics['clinical_priorities']['moderate_metrics']['recall'] +
        0.1 * metrics['clinical_priorities']['moderate_metrics']['precision']
    )
    metrics['clinical_utility_score'] = float(clinical_utility)
    
    return metrics

In [11]:
## 
def plot_confusion_matrix(cm, class_names, title='Confusion Matrix', normalize=False):
    """
    Plot confusion matrix with emphasis on critical misclassifications
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='.2f' if normalize else 'd',
                xticklabels=class_names, yticklabels=class_names)
    plt.title(title)
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    
    # Save the plot
    plt.savefig(f'confusion_matrix{"_normalized" if normalize else ""}.png')
    plt.close()

In [13]:
# Training loop with clinical evaluation
num_epochs = 20
best_clinical_utility = 0
logger.info("\nStarting training with clinical priorities...")

for epoch in range(num_epochs):
    # Training phase
    net.train()
    running_loss = 0.0
    
    for batch_idx, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        outputs = net(images)
        loss = criterion(outputs, labels)
        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm=1.0)
        
        optimizer.step()
        
        running_loss += loss.item()
        if batch_idx % 50 == 49:
            logger.info(f'[Epoch {epoch + 1}, Batch {batch_idx + 1}] loss: {running_loss / 50:.3f}')
            running_loss = 0.0
    
    # Evaluation phase
    metrics = evaluate_clinical_metrics(net, test_loader, device, dataset_expert1.classes)
    
    # Log results
    logger.info(f'\nEpoch {epoch + 1} Clinical Metrics:')
    logger.info(f'Clinical Utility Score: {metrics["clinical_utility_score"]:.3f}')
    logger.info(f'Severe (KL4) Recall: {metrics["clinical_priorities"]["severe_recall"]:.3f}')
    logger.info(f'Normal (KL0) Precision: {metrics["clinical_priorities"]["normal_precision"]:.3f}')
    
    # Update learning rate based on clinical utility score
    scheduler.step(metrics['clinical_utility_score'])
    
    # Save best model based on clinical utility
    if metrics['clinical_utility_score'] > best_clinical_utility:
        best_clinical_utility = metrics['clinical_utility_score']
        torch.save({
            'epoch': epoch,
            'model_state_dict': net.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'metrics': metrics,
            'clinical_utility_score': best_clinical_utility
        }, 'best_clinical_model.pth')
        
        # Save detailed metrics to JSON
        with open('best_model_metrics.json', 'w') as f:
            json.dump(metrics, f, indent=4)
        
        # Plot confusion matrices
        plot_confusion_matrix(np.array(metrics['confusion_matrix']), 
                            dataset_expert1.classes, 
                            title='Best Model Confusion Matrix')
        plot_confusion_matrix(np.array(metrics['normalized_confusion_matrix']), 
                            dataset_expert1.classes, 
                            title='Best Model Normalized Confusion Matrix',
                            normalize=True)
        
        logger.info(f'\nNew best model saved with clinical utility score: {best_clinical_utility:.3f}')

logger.info("\nTraining completed!")
logger.info(f"Best clinical utility score: {best_clinical_utility:.3f}")

#### END OF TRAINING AND EVALUATION ############################################################################

2025-05-18 18:00:21,521 - INFO - 
Starting training with clinical priorities...
2025-05-18 18:00:43,337 - INFO - 
Epoch 1 Clinical Metrics:
2025-05-18 18:00:43,338 - INFO - Clinical Utility Score: 0.098
2025-05-18 18:00:43,338 - INFO - Severe (KL4) Recall: 0.000
2025-05-18 18:00:43,338 - INFO - Normal (KL0) Precision: 0.328
2025-05-18 18:00:43,504 - INFO - 
New best model saved with clinical utility score: 0.098
2025-05-18 18:01:04,818 - INFO - 
Epoch 2 Clinical Metrics:
2025-05-18 18:01:04,818 - INFO - Clinical Utility Score: 0.116
2025-05-18 18:01:04,818 - INFO - Severe (KL4) Recall: 0.000
2025-05-18 18:01:04,819 - INFO - Normal (KL0) Precision: 0.387
2025-05-18 18:01:04,972 - INFO - 
New best model saved with clinical utility score: 0.116
2025-05-18 18:01:26,047 - INFO - 
Epoch 3 Clinical Metrics:
2025-05-18 18:01:26,047 - INFO - Clinical Utility Score: 0.110
2025-05-18 18:01:26,048 - INFO - Severe (KL4) Recall: 0.000
2025-05-18 18:01:26,048 - INFO - Normal (KL0) Precision: 0.368
20

In [14]:
###### Model Evaluation Metrics #####################################################################
from torchmetrics import Precision, Recall

def evaluate_test_metrics(net, test_loader, device, num_classes=5):
    """
    Evaluate model performance on test set using precision and recall metrics
    """
    # Set model to evaluation mode
    net.eval()
    
    # Initialize metrics for test set evaluation
    test_precision_micro = Precision(task="multiclass", num_classes=num_classes, average="micro").to(device)
    test_precision_macro = Precision(task="multiclass", num_classes=num_classes, average="macro").to(device)
    test_precision_none = Precision(task="multiclass", num_classes=num_classes, average=None).to(device)
    
    test_recall_micro = Recall(task="multiclass", num_classes=num_classes, average="micro").to(device)
    test_recall_macro = Recall(task="multiclass", num_classes=num_classes, average="macro").to(device)
    test_recall_none = Recall(task="multiclass", num_classes=num_classes, average=None).to(device)
    
    # Evaluate on test set only
    with torch.no_grad():
        for test_images, test_labels in test_loader:
            test_images = test_images.to(device)
            test_labels = test_labels.to(device)
            
            # Forward pass
            test_outputs = net(test_images)
            _, test_preds = torch.max(test_outputs, 1)
            
            # Update metrics
            test_precision_micro(test_preds, test_labels)
            test_precision_macro(test_preds, test_labels)
            test_precision_none(test_preds, test_labels)
            
            test_recall_micro(test_preds, test_labels)
            test_recall_macro(test_preds, test_labels)
            test_recall_none(test_preds, test_labels)
    
    # Compute final test metrics
    test_micro_precision = test_precision_micro.compute()
    test_macro_precision = test_precision_macro.compute()
    test_none_precision = test_precision_none.compute()
    
    test_micro_recall = test_recall_micro.compute()
    test_macro_recall = test_recall_macro.compute()
    test_none_recall = test_recall_none.compute()
    
    # Log test results
    logger.info("\nTest Set Evaluation Metrics:")
    
    logger.info("\nTest Micro-averaging metrics:")
    logger.info(f"Precision: {test_micro_precision:.3f}")
    logger.info(f"Recall: {test_micro_recall:.3f}")
    
    logger.info("\nTest Macro-averaging metrics:")
    logger.info(f"Precision: {test_macro_precision:.3f}")
    logger.info(f"Recall: {test_macro_recall:.3f}")
    
    logger.info("\nTest Per-class metrics:")
    for i, class_name in enumerate(dataset_expert1.classes):
        logger.info(f"\nClass: {class_name}")
        logger.info(f"Precision: {test_none_precision[i]:.3f}")
        logger.info(f"Recall: {test_none_recall[i]:.3f}")

# Run evaluation on test set
logger.info("\nEvaluating model metrics on test set...")
evaluate_test_metrics(net, test_loader, device)

#### END OF MODEL EVALUATION METRICS ##################################################################

2025-05-18 18:19:37,917 - INFO - 
Evaluating model metrics on test set...
2025-05-18 18:19:40,082 - INFO - 
Test Set Evaluation Metrics:
2025-05-18 18:19:40,082 - INFO - 
Test Micro-averaging metrics:
2025-05-18 18:19:40,082 - INFO - Precision: 0.309
2025-05-18 18:19:40,083 - INFO - Recall: 0.309
2025-05-18 18:19:40,083 - INFO - 
Test Macro-averaging metrics:
2025-05-18 18:19:40,083 - INFO - Precision: 0.062
2025-05-18 18:19:40,083 - INFO - Recall: 0.200
2025-05-18 18:19:40,084 - INFO - 
Test Per-class metrics:
2025-05-18 18:19:40,084 - INFO - 
Class: 0Normal
2025-05-18 18:19:40,084 - INFO - Precision: 0.309
2025-05-18 18:19:40,084 - INFO - Recall: 1.000
2025-05-18 18:19:40,084 - INFO - 
Class: 1Doubtful
2025-05-18 18:19:40,085 - INFO - Precision: 0.000
2025-05-18 18:19:40,085 - INFO - Recall: 0.000
2025-05-18 18:19:40,085 - INFO - 
Class: 2Mild
2025-05-18 18:19:40,085 - INFO - Precision: 0.000
2025-05-18 18:19:40,086 - INFO - Recall: 0.000
2025-05-18 18:19:40,086 - INFO - 
Class: 3Mod

In [None]:
############################ END OF THE SCRIPT ##########