# Medical Bill Detection Model Training

This notebook implements training of a YOLOv11 model for detecting various elements in medical bills, including:
- Date of Receipt
- GSTIN
- Invoice Number
- Mobile Number
- Product Table
- Store Address
- Store Name
- Total Amount

The training will run for 100 epochs with comprehensive performance metrics and visualizations.

## 1. Setup Environment and Dependencies

First, we'll import all necessary libraries and check GPU availability.

In [None]:
import os
import yaml
from ultralytics import YOLO
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import torch

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")

# Set the working directory to the dataset folder
os.chdir(os.path.dirname(os.path.abspath('__file__')))

## 2. Load and Explore Dataset

Let's analyze our dataset configuration and structure.

In [None]:
# Load dataset configuration
with open('data.yaml', 'r') as file:
    dataset_config = yaml.safe_load(file)

print("Dataset Configuration:")
print(f"Number of classes: {dataset_config['nc']}")
print("\nClasses:")
for idx, name in enumerate(dataset_config['names']):
    print(f"{idx}: {name}")

# Count number of images in each split
def count_images(path):
    return len([f for f in os.listdir(path) if f.endswith(('.jpg', '.jpeg', '.png'))])

train_path = os.path.join('train', 'images')
valid_path = os.path.join('valid', 'images')
test_path = os.path.join('test', 'images')

print("\nDataset Split:")
print(f"Training images: {count_images(train_path)}")
print(f"Validation images: {count_images(valid_path)}")
print(f"Test images: {count_images(test_path)}")

## 3. Configure and Train YOLO11 Model

Now we'll set up the YOLO11n model and train it on our medical bill detection task for 100 epochs.

In [None]:
# Initialize YOLO model
model = YOLO('yolo11n.pt')  # Load YOLOv11n pretrained model (will auto-download)

# Get the current directory (dataset folder) and construct absolute path to data.yaml
current_dir = os.getcwd()
data_yaml_path = os.path.join(current_dir, 'data.yaml')

print(f"Current directory: {current_dir}")
print(f"Data YAML path: {data_yaml_path}")
print(f"Data YAML exists: {os.path.exists(data_yaml_path)}")
print(f"All results will be saved in: {os.path.join(current_dir, 'training_results')}")

# Train the model
results = model.train(
    data=data_yaml_path,  # absolute path to data.yaml
    epochs=100,        # number of epochs
    imgsz=640,        # image size
    batch=16,         # batch size
    device=0 if torch.cuda.is_available() else 'cpu',  # device to use (GPU or CPU)
    workers=0,        # set to 0 for Windows compatibility (avoids multiprocessing issues)
    patience=50,      # early stopping patience
    project=current_dir,   # save to current dataset directory
    name='training_results',  # experiment name
    exist_ok=True     # overwrite existing experiment
)

## 4. View Training Results

Let's check the final training metrics.

In [None]:
import pandas as pd

# Load the results CSV
results_csv_path = Path('runs/medical_bill_detection/results.csv')
if results_csv_path.exists():
    df = pd.read_csv(results_csv_path)
    
    # Get the last epoch (final results)
    last_row = df.iloc[-1]
    
    print("\n" + "="*60)
    print("TRAINING COMPLETED - FINAL METRICS")
    print("="*60)
    print(f"Total Epochs Trained: {int(last_row['epoch'])}")
    print(f"\nFinal Performance Metrics:")
    print(f"  Precision (B): {last_row['metrics/precision(B)']:.4f}")
    print(f"  Recall (B):    {last_row['metrics/recall(B)']:.4f}")
    print(f"  mAP50 (B):     {last_row['metrics/mAP50(B)']:.4f}")
    print(f"  mAP50-95 (B):  {last_row['metrics/mAP50-95(B)']:.4f}")
    
    print(f"\nFinal Training Losses:")
    print(f"  Box Loss:  {last_row['train/box_loss']:.4f}")
    print(f"  Class Loss: {last_row['train/cls_loss']:.4f}")
    print(f"  DFL Loss:   {last_row['train/dfl_loss']:.4f}")
    
    print(f"\nFinal Validation Losses:")
    print(f"  Box Loss:  {last_row['val/box_loss']:.4f}")
    print(f"  Class Loss: {last_row['val/cls_loss']:.4f}")
    print(f"  DFL Loss:   {last_row['val/dfl_loss']:.4f}")
    
    # Find best epoch
    best_epoch = df['metrics/mAP50(B)'].idxmax() + 1
    best_map50 = df['metrics/mAP50(B)'].max()
    print(f"\nBest Epoch: {best_epoch} (mAP50: {best_map50:.4f})")
    print("="*60)
else:
    print("Results CSV not found. Training may not have completed yet.")

## 5. Visualize Training Progress

Let's visualize how the model improved over the training epochs.

In [None]:
if results_csv_path.exists():
    # Create a figure with subplots for different metrics
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('Training Progress Over Epochs', fontsize=16, fontweight='bold')
    
    # Plot 1: mAP Metrics
    ax = axes[0, 0]
    ax.plot(df['epoch'], df['metrics/mAP50(B)'], label='mAP50', linewidth=2, color='#2E86AB')
    ax.plot(df['epoch'], df['metrics/mAP50-95(B)'], label='mAP50-95', linewidth=2, color='#A23B72')
    ax.set_xlabel('Epoch', fontsize=12)
    ax.set_ylabel('mAP Score', fontsize=12)
    ax.set_title('Mean Average Precision (mAP)', fontsize=14, fontweight='bold')
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)
    
    # Plot 2: Precision and Recall
    ax = axes[0, 1]
    ax.plot(df['epoch'], df['metrics/precision(B)'], label='Precision', linewidth=2, color='#06A77D')
    ax.plot(df['epoch'], df['metrics/recall(B)'], label='Recall', linewidth=2, color='#F18F01')
    ax.set_xlabel('Epoch', fontsize=12)
    ax.set_ylabel('Score', fontsize=12)
    ax.set_title('Precision & Recall', fontsize=14, fontweight='bold')
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)
    
    # Plot 3: Training Losses
    ax = axes[1, 0]
    ax.plot(df['epoch'], df['train/box_loss'], label='Box Loss', linewidth=2, color='#E63946')
    ax.plot(df['epoch'], df['train/cls_loss'], label='Class Loss', linewidth=2, color='#457B9D')
    ax.plot(df['epoch'], df['train/dfl_loss'], label='DFL Loss', linewidth=2, color='#2A9D8F')
    ax.set_xlabel('Epoch', fontsize=12)
    ax.set_ylabel('Loss', fontsize=12)
    ax.set_title('Training Losses', fontsize=14, fontweight='bold')
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)
    
    # Plot 4: Validation Losses
    ax = axes[1, 1]
    ax.plot(df['epoch'], df['val/box_loss'], label='Val Box Loss', linewidth=2, color='#E63946')
    ax.plot(df['epoch'], df['val/cls_loss'], label='Val Class Loss', linewidth=2, color='#457B9D')
    ax.plot(df['epoch'], df['val/dfl_loss'], label='Val DFL Loss', linewidth=2, color='#2A9D8F')
    ax.set_xlabel('Epoch', fontsize=12)
    ax.set_ylabel('Loss', fontsize=12)
    ax.set_title('Validation Losses', fontsize=14, fontweight='bold')
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print("\nâœ“ Training progress visualizations displayed successfully!")

## 6. Validate Model Performance on Validation Set

Let's load the best trained model and evaluate it on the validation set to get detailed per-class metrics.

In [None]:
# Load the best trained model
best_model_path = Path('runs/medical_bill_detection/weights/best.pt')

if best_model_path.exists():
    print("Loading best model for validation...")
    best_model = YOLO(str(best_model_path))
    
    # Run validation
    print("\nRunning validation on validation set...")
    val_results = best_model.val(
        data=data_yaml_path,
        split='val',
        batch=16,
        imgsz=640,
        device=0 if torch.cuda.is_available() else 'cpu',
        verbose=True
    )
    
    print("\n" + "="*80)
    print("VALIDATION RESULTS ON VALIDATION SET")
    print("="*80)
    
    # Overall metrics
    print("\nOverall Metrics:")
    print(f"  Box Precision:    {val_results.box.p.mean():.4f}")
    print(f"  Box Recall:       {val_results.box.r.mean():.4f}")
    print(f"  mAP50:            {val_results.box.map50:.4f}")
    print(f"  mAP50-95:         {val_results.box.map:.4f}")
    
    # Per-class metrics
    print("\nPer-Class Metrics:")
    print(f"{'Class':<20} {'Precision':<12} {'Recall':<12} {'mAP50':<12} {'mAP50-95':<12}")
    print("-" * 80)
    
    class_names = dataset_config['names']
    for idx, class_name in enumerate(class_names):
        if idx < len(val_results.box.ap_class_index):
            precision = val_results.box.p[idx] if idx < len(val_results.box.p) else 0
            recall = val_results.box.r[idx] if idx < len(val_results.box.r) else 0
            map50 = val_results.box.ap50[idx] if idx < len(val_results.box.ap50) else 0
            map = val_results.box.ap[idx] if idx < len(val_results.box.ap) else 0
            
            print(f"{class_name:<20} {precision:<12.4f} {recall:<12.4f} {map50:<12.4f} {map:<12.4f}")
    
    print("="*80)
    
    # Save validation metrics to CSV
    val_metrics_data = {
        'class': class_names,
        'precision': [val_results.box.p[i] if i < len(val_results.box.p) else 0 for i in range(len(class_names))],
        'recall': [val_results.box.r[i] if i < len(val_results.box.r) else 0 for i in range(len(class_names))],
        'mAP50': [val_results.box.ap50[i] if i < len(val_results.box.ap50) else 0 for i in range(len(class_names))],
        'mAP50-95': [val_results.box.ap[i] if i < len(val_results.box.ap) else 0 for i in range(len(class_names))]
    }
    
    val_metrics_df = pd.DataFrame(val_metrics_data)
    val_metrics_csv = results_dir / 'validation_metrics.csv'
    val_metrics_df.to_csv(val_metrics_csv, index=False)
    print(f"\nValidation metrics saved to: {val_metrics_csv}")
    
else:
    print(f"Best model not found at {best_model_path}")

## 7. Test Model Performance on Test Set

Now let's evaluate the model on the held-out test set to measure real-world performance.

In [None]:
if best_model_path.exists():
    print("Running evaluation on test set...")
    
    # Run validation on test split
    test_results = best_model.val(
        data=data_yaml_path,
        split='test',
        batch=16,
        imgsz=640,
        device=0 if torch.cuda.is_available() else 'cpu',
        verbose=True
    )
    
    print("\n" + "="*80)
    print("TEST RESULTS ON TEST SET (UNSEEN DATA)")
    print("="*80)
    
    # Overall metrics
    print("\nOverall Metrics:")
    print(f"  Box Precision:    {test_results.box.p.mean():.4f}")
    print(f"  Box Recall:       {test_results.box.r.mean():.4f}")
    print(f"  mAP50:            {test_results.box.map50:.4f}")
    print(f"  mAP50-95:         {test_results.box.map:.4f}")
    
    # Per-class metrics
    print("\nPer-Class Test Metrics:")
    print(f"{'Class':<20} {'Precision':<12} {'Recall':<12} {'mAP50':<12} {'mAP50-95':<12}")
    print("-" * 80)
    
    for idx, class_name in enumerate(class_names):
        if idx < len(test_results.box.ap_class_index):
            precision = test_results.box.p[idx] if idx < len(test_results.box.p) else 0
            recall = test_results.box.r[idx] if idx < len(test_results.box.r) else 0
            map50 = test_results.box.ap50[idx] if idx < len(test_results.box.ap50) else 0
            map = test_results.box.ap[idx] if idx < len(test_results.box.ap) else 0
            
            print(f"{class_name:<20} {precision:<12.4f} {recall:<12.4f} {map50:<12.4f} {map:<12.4f}")
    
    print("="*80)
    
    # Save test metrics to CSV
    test_metrics_data = {
        'class': class_names,
        'precision': [test_results.box.p[i] if i < len(test_results.box.p) else 0 for i in range(len(class_names))],
        'recall': [test_results.box.r[i] if i < len(test_results.box.r) else 0 for i in range(len(class_names))],
        'mAP50': [test_results.box.ap50[i] if i < len(test_results.box.ap50) else 0 for i in range(len(class_names))],
        'mAP50-95': [test_results.box.ap[i] if i < len(test_results.box.ap) else 0 for i in range(len(class_names))]
    }
    
    test_metrics_df = pd.DataFrame(test_metrics_data)
    test_metrics_csv = results_dir / 'test_metrics.csv'
    test_metrics_df.to_csv(test_metrics_csv, index=False)
    print(f"\nTest metrics saved to: {test_metrics_csv}")
    
else:
    print(f"Best model not found at {best_model_path}")

## 9. Compare Validation vs Test Performance

Let's compare how the model performs on validation vs test set to check for overfitting.

In [None]:
if best_model_path.exists():
    # Create comparison dataframe
    comparison_data = {
        'Class': class_names,
        'Val_Precision': [val_results.box.p[i] if i < len(val_results.box.p) else 0 for i in range(len(class_names))],
        'Test_Precision': [test_results.box.p[i] if i < len(test_results.box.p) else 0 for i in range(len(class_names))],
        'Val_Recall': [val_results.box.r[i] if i < len(val_results.box.r) else 0 for i in range(len(class_names))],
        'Test_Recall': [test_results.box.r[i] if i < len(test_results.box.r) else 0 for i in range(len(class_names))],
        'Val_mAP50': [val_results.box.ap50[i] if i < len(val_results.box.ap50) else 0 for i in range(len(class_names))],
        'Test_mAP50': [test_results.box.ap50[i] if i < len(test_results.box.ap50) else 0 for i in range(len(class_names))],
    }
    
    comparison_df = pd.DataFrame(comparison_data)
    
    # Plot comparison
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    fig.suptitle('Validation vs Test Performance Comparison', fontsize=16, fontweight='bold')
    
    metrics_to_plot = [
        ('Precision', 'Val_Precision', 'Test_Precision'),
        ('Recall', 'Val_Recall', 'Test_Recall'),
        ('mAP50', 'Val_mAP50', 'Test_mAP50')
    ]
    
    x = np.arange(len(class_names))
    width = 0.35
    
    for idx, (metric_name, val_col, test_col) in enumerate(metrics_to_plot):
        ax = axes[idx]
        
        val_bars = ax.bar(x - width/2, comparison_df[val_col], width, label='Validation', 
                          color='#2E86AB', alpha=0.8)
        test_bars = ax.bar(x + width/2, comparison_df[test_col], width, label='Test', 
                           color='#06A77D', alpha=0.8)
        
        ax.set_xlabel('Class', fontsize=11)
        ax.set_ylabel(metric_name, fontsize=11)
        ax.set_title(f'{metric_name} Comparison', fontsize=13, fontweight='bold')
        ax.set_xticks(x)
        ax.set_xticklabels(class_names, rotation=45, ha='right', fontsize=9)
        ax.legend(fontsize=10)
        ax.grid(True, alpha=0.3, axis='y')
        ax.set_ylim([0, 1.05])
    
    plt.tight_layout()
    plt.show()
    
    # Display comparison table
    print("\n" + "="*120)
    print("VALIDATION vs TEST PERFORMANCE COMPARISON")
    print("="*120)
    print(comparison_df.to_string(index=False))
    print("="*120)
    
    # Calculate and display overall comparison
    print("\nOverall Performance Summary:")
    print(f"{'Metric':<20} {'Validation':<15} {'Test':<15} {'Difference':<15}")
    print("-" * 65)
    
    metrics_summary = [
        ('Precision', val_results.box.p.mean(), test_results.box.p.mean()),
        ('Recall', val_results.box.r.mean(), test_results.box.r.mean()),
        ('mAP50', val_results.box.map50, test_results.box.map50),
        ('mAP50-95', val_results.box.map, test_results.box.map)
    ]
    
    for metric_name, val_score, test_score in metrics_summary:
        diff = test_score - val_score
        diff_str = f"{diff:+.4f}"
        print(f"{metric_name:<20} {val_score:<15.4f} {test_score:<15.4f} {diff_str:<15}")
    
    print("-" * 65)
    
    # Save comparison to CSV
    comparison_csv = results_dir / 'validation_vs_test_comparison.csv'
    comparison_df.to_csv(comparison_csv, index=False)
    print(f"\nComparison metrics saved to: {comparison_csv}")
    
else:
    print(f"Cannot perform comparison without validation and test results")

## 10. Generate Comprehensive Model Report

Create a final summary report with all key information about the trained model.

In [None]:
import json
from datetime import datetime

if best_model_path.exists():
    # Create comprehensive report
    report = {
        'model_info': {
            'model_type': 'YOLOv11n',
            'task': 'Medical Bill Detection',
            'num_classes': dataset_config['nc'],
            'class_names': dataset_config['names'],
            'training_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            'best_model_path': str(best_model_path),
            'last_model_path': str(best_model_path.parent / 'last.pt')
        },
        'training_config': {
            'epochs': int(last_row['epoch']),
            'image_size': 640,
            'batch_size': 16,
            'device': 'GPU' if torch.cuda.is_available() else 'CPU',
            'patience': 50
        },
        'dataset_info': {
            'train_images': count_images(train_path),
            'validation_images': count_images(valid_path),
            'test_images': count_images(test_path),
            'total_images': count_images(train_path) + count_images(valid_path) + count_images(test_path)
        },
        'final_training_metrics': {
            'precision': float(last_row['metrics/precision(B)']),
            'recall': float(last_row['metrics/recall(B)']),
            'mAP50': float(last_row['metrics/mAP50(B)']),
            'mAP50-95': float(last_row['metrics/mAP50-95(B)']),
            'box_loss': float(last_row['train/box_loss']),
            'class_loss': float(last_row['train/cls_loss']),
            'dfl_loss': float(last_row['train/dfl_loss'])
        },
        'validation_metrics': {
            'overall': {
                'precision': float(val_results.box.p.mean()),
                'recall': float(val_results.box.r.mean()),
                'mAP50': float(val_results.box.map50),
                'mAP50-95': float(val_results.box.map)
            },
            'per_class': {}
        },
        'test_metrics': {
            'overall': {
                'precision': float(test_results.box.p.mean()),
                'recall': float(test_results.box.r.mean()),
                'mAP50': float(test_results.box.map50),
                'mAP50-95': float(test_results.box.map)
            },
            'per_class': {}
        }
    }
    
    # Add per-class metrics
    for idx, class_name in enumerate(class_names):
        if idx < len(val_results.box.ap_class_index):
            report['validation_metrics']['per_class'][class_name] = {
                'precision': float(val_results.box.p[idx]) if idx < len(val_results.box.p) else 0,
                'recall': float(val_results.box.r[idx]) if idx < len(val_results.box.r) else 0,
                'mAP50': float(val_results.box.ap50[idx]) if idx < len(val_results.box.ap50) else 0,
                'mAP50-95': float(val_results.box.ap[idx]) if idx < len(val_results.box.ap) else 0
            }
            
        if idx < len(test_results.box.ap_class_index):
            report['test_metrics']['per_class'][class_name] = {
                'precision': float(test_results.box.p[idx]) if idx < len(test_results.box.p) else 0,
                'recall': float(test_results.box.r[idx]) if idx < len(test_results.box.r) else 0,
                'mAP50': float(test_results.box.ap50[idx]) if idx < len(test_results.box.ap50) else 0,
                'mAP50-95': float(test_results.box.ap[idx]) if idx < len(test_results.box.ap) else 0
            }
    
    # Save report as JSON
    report_json_path = results_dir / 'model_report.json'
    with open(report_json_path, 'w') as f:
        json.dump(report, f, indent=2)
    
    # Print formatted report
    print("\n" + "="*80)
    print(" " * 25 + "MODEL TRAINING REPORT")
    print("="*80)
    
    print("\nMODEL INFORMATION")
    print("-" * 80)
    print(f"  Model Type:           {report['model_info']['model_type']}")
    print(f"  Task:                 {report['model_info']['task']}")
    print(f"  Number of Classes:    {report['model_info']['num_classes']}")
    print(f"  Training Date:        {report['model_info']['training_date']}")
    
    print("\nTRAINING CONFIGURATION")
    print("-" * 80)
    print(f"  Epochs:               {report['training_config']['epochs']}")
    print(f"  Image Size:           {report['training_config']['image_size']}")
    print(f"  Batch Size:           {report['training_config']['batch_size']}")
    print(f"  Device:               {report['training_config']['device']}")
    
    print("\nDATASET INFORMATION")
    print("-" * 80)
    print(f"  Training Images:      {report['dataset_info']['train_images']}")
    print(f"  Validation Images:    {report['dataset_info']['validation_images']}")
    print(f"  Test Images:          {report['dataset_info']['test_images']}")
    print(f"  Total Images:         {report['dataset_info']['total_images']}")
    
    print("\nFINAL PERFORMANCE METRICS")
    print("-" * 80)
    print(f"{'Dataset':<15} {'Precision':<12} {'Recall':<12} {'mAP50':<12} {'mAP50-95':<12}")
    print("-" * 80)
    print(f"{'Validation':<15} {report['validation_metrics']['overall']['precision']:<12.4f} "
          f"{report['validation_metrics']['overall']['recall']:<12.4f} "
          f"{report['validation_metrics']['overall']['mAP50']:<12.4f} "
          f"{report['validation_metrics']['overall']['mAP50-95']:<12.4f}")
    print(f"{'Test':<15} {report['test_metrics']['overall']['precision']:<12.4f} "
          f"{report['test_metrics']['overall']['recall']:<12.4f} "
          f"{report['test_metrics']['overall']['mAP50']:<12.4f} "
          f"{report['test_metrics']['overall']['mAP50-95']:<12.4f}")
    
    print("\nSAVED FILES")
    print("-" * 80)
    print(f"  Best Model:           {report['model_info']['best_model_path']}")
    print(f"  Training Results:     {results_csv_path}")
    print(f"  Validation Metrics:   {val_metrics_csv}")
    print(f"  Test Metrics:         {test_metrics_csv}")
    print(f"  Comparison:           {comparison_csv}")
    print(f"  Model Report (JSON):  {report_json_path}")
    
    print("\n" + "="*80)
    print("TRAINING, VALIDATION, AND TESTING COMPLETED SUCCESSFULLY!")
    print("="*80)
    
    print(f"\nComprehensive model report saved to: {report_json_path}")
    
else:
    print("Cannot generate report without model and results")