In [1]:
!pip install ultralytics


Collecting ultralytics
  Downloading ultralytics-8.3.170-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  

In [2]:
import os
from pathlib import Path
import shutil
import yaml
import random

import torch
from ultralytics import YOLO    # <-- only this is required

from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import torch.nn as nn
import torch.optim as optim


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [3]:
from pathlib import Path
import shutil
import os

# ✅ Define paths
ROOT = Path("/kaggle/input/welding-defect-object-detection/The Welding Defect Dataset - v2/The Welding Defect Dataset - v2")
WORK = Path("/kaggle/working/welding-data")

# ✅ Recreate working directory (optional: clean start)
if WORK.exists():
    shutil.rmtree(WORK)
os.makedirs(WORK, exist_ok=True)

# ✅ Copy train/valid/test folders (images + labels)
for split in ("train", "valid", "test"):
    src_img = ROOT / split / "images"
    src_lbl = ROOT / split / "labels"
    
    dst_img = WORK / split / "images"
    dst_lbl = WORK / split / "labels"
    
    shutil.copytree(src_img, dst_img)
    shutil.copytree(src_lbl, dst_lbl)

print("✅ Data successfully copied to working directory!")


✅ Data successfully copied to working directory!


In [4]:
updated_yaml = """
train: /kaggle/working/welding-data/train/images
val: /kaggle/working/welding-data/valid/images
test: /kaggle/working/welding-data/test/images

nc: 3
names: ['Bad Weld', 'Good Weld', 'Defect']
"""

with open("/kaggle/working/welding-data/data.yaml", "w") as f:
    f.write(updated_yaml.strip())

print("✅ data.yaml updated and saved in working directory.")


✅ data.yaml updated and saved in working directory.


In [5]:
from ultralytics import YOLO

model = YOLO("yolov8n.pt")  # or yolov8s.pt, yolov8m.pt etc.

model.train(data="/kaggle/working/welding-data/data.yaml", epochs=20, imgsz=416, batch=8)


Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 84.9MB/s]


Ultralytics 8.3.170 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/kaggle/working/welding-data/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=20, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=416, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True,

100%|██████████| 755k/755k [00:00<00:00, 16.8MB/s]


Overriding model.yaml nc=80 with nc=3

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics

100%|██████████| 5.35M/5.35M [00:00<00:00, 76.2MB/s]


[34m[1mAMP: [0mchecks passed ✅
[34m[1mtrain: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1175.9±480.5 MB/s, size: 45.7 KB)


[34m[1mtrain: [0mScanning /kaggle/working/welding-data/train/labels... 1619 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1619/1619 [00:01<00:00, 1271.66it/s]

[34m[1mtrain: [0mNew cache created: /kaggle/working/welding-data/train/labels.cache





[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 493.3±179.3 MB/s, size: 51.7 KB)


[34m[1mval: [0mScanning /kaggle/working/welding-data/valid/labels... 283 images, 0 backgrounds, 0 corrupt: 100%|██████████| 283/283 [00:00<00:00, 1153.41it/s]

[34m[1mval: [0mNew cache created: /kaggle/working/welding-data/valid/labels.cache





Plotting labels to runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001429, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 416 train, 416 val
Using 2 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/20     0.514G      2.051      2.968       1.65         19        416: 100%|██████████| 203/203 [00:18<00:00, 10.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  7.29it/s]

                   all        283        802      0.452      0.192      0.112     0.0462






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/20     0.629G       2.02      2.582      1.671         24        416: 100%|██████████| 203/203 [00:16<00:00, 12.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 10.92it/s]


                   all        283        802      0.127       0.31      0.134     0.0519

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/20     0.645G      1.978      2.457      1.643         17        416: 100%|██████████| 203/203 [00:16<00:00, 12.54it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 12.11it/s]

                   all        283        802      0.539      0.275      0.199     0.0882






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/20     0.662G      1.963      2.415      1.628         10        416: 100%|██████████| 203/203 [00:16<00:00, 12.54it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 11.99it/s]

                   all        283        802      0.463      0.312      0.259       0.12






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/20      0.68G      1.926      2.307       1.62         11        416: 100%|██████████| 203/203 [00:16<00:00, 12.55it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 12.16it/s]


                   all        283        802      0.395      0.324      0.249      0.116

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/20     0.695G       1.87      2.281      1.584          2        416: 100%|██████████| 203/203 [00:16<00:00, 12.61it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 12.55it/s]

                   all        283        802      0.327      0.367      0.265      0.127






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/20     0.713G      1.853      2.176      1.566          7        416: 100%|██████████| 203/203 [00:16<00:00, 12.58it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 12.31it/s]

                   all        283        802      0.429      0.336      0.324      0.166






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/20      0.73G      1.812      2.107      1.525         10        416: 100%|██████████| 203/203 [00:16<00:00, 12.61it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 11.94it/s]


                   all        283        802       0.65      0.359      0.335      0.163

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/20     0.748G      1.776      2.067      1.502         14        416: 100%|██████████| 203/203 [00:16<00:00, 12.61it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 12.34it/s]

                   all        283        802       0.38      0.413      0.341       0.17






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/20     0.754G       1.75      2.009       1.49          8        416: 100%|██████████| 203/203 [00:16<00:00, 12.53it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 12.35it/s]

                   all        283        802      0.372      0.476       0.34      0.166





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/20     0.783G      1.754      1.991      1.569         13        416: 100%|██████████| 203/203 [00:16<00:00, 12.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 12.18it/s]

                   all        283        802      0.512      0.385      0.394      0.206






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/20     0.799G      1.724      1.912      1.562          5        416: 100%|██████████| 203/203 [00:15<00:00, 12.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 11.60it/s]

                   all        283        802      0.483      0.477      0.427      0.233






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/20     0.814G      1.696      1.872       1.53          8        416: 100%|██████████| 203/203 [00:16<00:00, 12.66it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 11.99it/s]

                   all        283        802      0.463      0.507       0.44      0.233






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/20     0.832G      1.666      1.802      1.519          5        416: 100%|██████████| 203/203 [00:15<00:00, 12.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 12.15it/s]

                   all        283        802       0.46      0.501      0.442      0.238






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/20     0.852G       1.65      1.737      1.488         17        416: 100%|██████████| 203/203 [00:16<00:00, 12.54it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 12.36it/s]

                   all        283        802      0.543      0.486      0.476      0.263






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/20     0.865G      1.614      1.667      1.462          8        416: 100%|██████████| 203/203 [00:15<00:00, 12.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 12.86it/s]

                   all        283        802      0.512       0.54      0.505      0.281






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/20     0.885G      1.564      1.622      1.446         10        416: 100%|██████████| 203/203 [00:16<00:00, 12.56it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 11.93it/s]


                   all        283        802      0.624      0.489       0.51       0.29

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/20     0.891G      1.544      1.594      1.428          8        416: 100%|██████████| 203/203 [00:16<00:00, 12.62it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 12.53it/s]

                   all        283        802      0.678      0.492      0.536      0.309






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/20     0.918G      1.525      1.539      1.412          8        416: 100%|██████████| 203/203 [00:16<00:00, 12.58it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 12.83it/s]

                   all        283        802       0.56      0.557      0.555      0.315






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/20     0.934G      1.489      1.494      1.383          4        416: 100%|██████████| 203/203 [00:16<00:00, 12.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:01<00:00, 12.50it/s]

                   all        283        802       0.46      0.621      0.541      0.318






20 epochs completed in 0.101 hours.
Optimizer stripped from runs/detect/train/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/train/weights/best.pt, 6.2MB

Validating runs/detect/train/weights/best.pt...
Ultralytics 8.3.170 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 3,006,233 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 18/18 [00:02<00:00,  7.26it/s]
  xa[xa < 0] = -1
  xa[xa < 0] = -1


                   all        283        802      0.456      0.622      0.541      0.318
              Bad Weld        141        194      0.493      0.809      0.683      0.434
             Good Weld        175        335      0.485      0.812      0.694      0.419
                Defect        128        273      0.389      0.245      0.248      0.101
Speed: 0.1ms preprocess, 2.6ms inference, 0.0ms loss, 2.8ms postprocess per image
Results saved to [1mruns/detect/train[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7a54f543da10>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.04

In [6]:
from pathlib import Path
import os
import shutil
import random
import torch
from ultralytics import YOLO

from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image # Ensure PIL is imported for image loading
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np # For numerical operations, especially with multi-hot encoding



In [7]:
# --- 3. Data Preparation for Classification (Multi-Label) ---
# This section reorganizes the welding defect dataset from YOLO's object detection
# format (images + labels with bounding boxes) into a classification format
# suitable for multi-label ResNet18 training.
import collections

# Define your data transformations
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),  # Resize all images to 224x224
        transforms.RandomHorizontalFlip(), # Example augmentation
        transforms.ToTensor(),             # Convert images to PyTorch tensors
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize with ImageNet stats
    ]),
    'valid': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
}
SRC = Path("/kaggle/working/welding-data") # Source directory for YOLO-formatted data
DST = Path("/kaggle/working/classification_data") # Destination directory for classification-formatted data
# No need to create subfolders for each class with ImageFolder anymore, but still need base DST.
os.makedirs(DST, exist_ok=True) # Create the destination base directory

# Define the category names matching the class indices in the YOLO labels.
categories = ["Bad Weld", "Good Weld", "Defect"]
# Store a list of (image_path, label_file_path, split) tuples
all_image_info = collections.defaultdict(list) # To store info for each split

# We will collect counts for each class directly from the label files
# This is crucial for calculating pos_weight for BCEWithLogitsLoss
multi_label_class_presence_counts = collections.defaultdict(int)

print("\nPreparing classification data by organizing image info and collecting multi-label counts...")

for split in ['train', 'valid', 'test']:
    split_img_dir = SRC / split / "images"
    split_lbl_dir = SRC / split / "labels"

    # We need to explicitly copy images for the classification dataset if they are not already.
    # We will create a flat structure for the classification data in `DST/split/`.
    split_dst_dir = DST / split
    split_dst_dir.mkdir(parents=True, exist_ok=True)

    for img_file in split_img_dir.glob("*.jpg"):
        label_file = split_lbl_dir / (img_file.stem + ".txt")

        if not label_file.exists():
            continue # Skip images without labels

        # Copy image to the classification data directory for this split
        shutil.copy(img_file, split_dst_dir / img_file.name)
        
        # Store information for the custom dataset
        all_image_info[split].append({
            'image_path': str(split_dst_dir / img_file.name),
            'label_path': str(label_file)
        })

        # Count class presences for calculating pos_weights
        if split == 'train':
            with open(label_file, 'r') as f:
                lines = f.readlines()
            # Get unique class IDs present in this image
            classes_in_this_image = list(set(int(line.split()[0]) for line in lines if line.strip()))
            for cls_idx in classes_in_this_image:
                if 0 <= cls_idx < len(categories):
                    multi_label_class_presence_counts[cls_idx] += 1
                else:
                    print(f"Warning: Class index {cls_idx} out of range for {img_file.name}. Skipping class.")

print("✅ Multi-label classification data info collected.")
print("Raw training multi-label class presence counts:", dict(multi_label_class_presence_counts))


class CustomMultiLabelDataset(Dataset):
    def __init__(self, image_info_list, categories, transform=None):
        self.image_info = image_info_list
        self.categories = categories
        self.transform = transform
        self.num_classes = len(categories)

    def __len__(self):
        return len(self.image_info)

    def __getitem__(self, idx):
        item_info = self.image_info[idx]
        image_path = item_info['image_path']
        label_path = item_info['label_path']

        # Load image
        image = Image.open(image_path).convert('RGB')

        # Create multi-hot encoded label
        label_tensor = torch.zeros(self.num_classes, dtype=torch.float32) # Use float32 for BCEWithLogitsLoss
        
        # Read class IDs from YOLO label file
        class_ids_in_image = set()
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                for line in f:
                    if line.strip():
                        class_id = int(line.split()[0])
                        if 0 <= class_id < self.num_classes:
                            class_ids_in_image.add(class_id)
        
        for class_id in class_ids_in_image:
            label_tensor[class_id] = 1.0 # Set to 1.0 for present classes

        # Apply transforms
        if self.transform:
            image = self.transform(image)

        return image, label_tensor

# Define CLASSIFICATION_DATA_DIR for clarity, as it's used below
CLASSIFICATION_DATA_DIR = DST # /kaggle/working/classification_data

# Create CustomMultiLabelDataset instances
image_datasets = {
    x: CustomMultiLabelDataset(
        image_info_list=all_image_info[x],
        categories=categories,
        transform=data_transforms[x]
    ) for x in ['train', 'valid', 'test']
}

# Create DataLoaders
dataloaders = {x: DataLoader(image_datasets[x],
                             batch_size=32,
                             shuffle=True if x == 'train' else False,
                             num_workers=2)
               for x in ['train', 'valid', 'test']}

# Get dataset sizes (number of images, not total defect instances)
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid', 'test']}
# Class names are defined globally
class_names = categories # Use your defined categories list as class_names
print("\nClasses (corresponding to class indices 0, 1, 2...):", class_names)
print("Dataset sizes per split (number of images):", dataset_sizes)


Preparing classification data by organizing image info and collecting multi-label counts...
✅ Multi-label classification data info collected.
Raw training multi-label class presence counts: {1: 1019, 0: 785, 2: 776}

Classes (corresponding to class indices 0, 1, 2...): ['Bad Weld', 'Good Weld', 'Defect']
Dataset sizes per split (number of images): {'train': 1619, 'valid': 283, 'test': 126}


In [8]:
# --- 4. Calculate Class Weights for Imbalanced Dataset (Multi-Label Classification) ---
# This section computes `pos_weight` for `nn.BCEWithLogitsLoss`.
# For each class, pos_weight helps to balance precision and recall by
# weighting positive examples of that class more if they are rare.

# Convert class presence counts from defaultdict to a list, ensuring order by class index
class_presence_list = [multi_label_class_presence_counts[i] for i in range(len(categories))]
print("Class presence counts (ordered by index):", class_presence_list)

# Calculate `pos_weight` for BCEWithLogitsLoss
# pos_weight[i] = (total_negative_samples_for_class_i / total_positive_samples_for_class_i)
# `dataset_sizes['train']` is the total number of images.
# `class_presence_list[i]` is the number of images where class `i` is present.
pos_weights = []
for i, positive_count in enumerate(class_presence_list):
    total_images = dataset_sizes['train']
    if positive_count == 0:
        # If a class has no positive samples, its pos_weight is undefined or very high.
        # For practical purposes, you might assign 1.0 or a very large number, or exclude it if unused.
        # Here, we'll assign 1.0, meaning no special weighting.
        pos_weights.append(1.0)
    else:
        negative_count = total_images - positive_count
        weight = negative_count / positive_count
        pos_weights.append(weight)

class_weights_tensor = torch.tensor(pos_weights, dtype=torch.float32)

print("Calculated Class Weights (pos_weight for BCEWithLogitsLoss):", class_weights_tensor)

Class presence counts (ordered by index): [785, 1019, 776]
Calculated Class Weights (pos_weight for BCEWithLogitsLoss): tensor([1.0624, 0.5888, 1.0863])


In [9]:
# ... (existing code for model loading, freezing parameters, replacing model.fc) ...

# Move the entire model to the chosen device (GPU or CPU).

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.resnet18(pretrained=True)

# Replace the last FC layer for 3 classes
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 3)

model = model.to(device)

# Define Loss Function and Optimizer
# criterion: BCEWithLogitsLoss is suitable for multi-label classification.
# We pass `pos_weight` calculated earlier to this criterion.
# This makes the loss function penalize misclassifications of rare classes more heavily when they are positive.
criterion = nn.BCEWithLogitsLoss(pos_weight=class_weights_tensor.to(device)) # <--- CHANGED: Multi-label Loss

# ... (rest of the code for optimizer and scheduler remains the same) ...

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 161MB/s]


In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
optimizer = optim.Adam(model.parameters(), lr=1e-4)

num_epochs = 5

# Assuming you have your model, dataloaders, and device defined
# model = YourMultiLabelModel().to(device)
# dataloaders = {'train': your_train_dataloader, ...}
# criterion = nn.BCEWithLogitsLoss() # Typically for multi-label classification
# optimizer = optim.Adam(model.parameters(), lr=1e-4)
# num_epochs = 5

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    model.train()
    running_loss = 0.0
    correct_predictions = 0 # Renamed for clarity
    total_samples = 0     # Renamed for clarity

    for inputs, labels in dataloaders['train']:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels.float()) # Ensure labels are float for BCEWithLogitsLoss
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # --- MODIFIED ACCURACY CALCULATION FOR MULTI-LABEL ---
        # Apply a sigmoid to the outputs to get probabilities (if using BCEWithLogitsLoss)
        # Then, threshold the probabilities to get binary predictions (e.g., 0.5)
        predicted_labels = (torch.sigmoid(outputs) > 0.5).float()

        # For multi-label, 'correct' is typically calculated per label, then averaged.
        # Here, we're calculating the number of exactly matched predictions across all labels for each sample.
        # You might want to use more sophisticated metrics like F1-score, precision, recall for multi-label.
        correct_predictions += (predicted_labels == labels).all(dim=1).sum().item()
        total_samples += labels.size(0) # Number of images in the batch
        # ---------------------------------------------------

    # Calculate overall accuracy
    accuracy = 100 * correct_predictions / total_samples if total_samples > 0 else 0
    print(f"Train Loss: {running_loss:.4f}, Accuracy: {accuracy:.2f}%")

Epoch 1/5
Train Loss: 21.3780, Accuracy: 55.03%
Epoch 2/5
Train Loss: 10.6345, Accuracy: 81.47%
Epoch 3/5
Train Loss: 6.3494, Accuracy: 89.38%
Epoch 4/5
Train Loss: 4.0524, Accuracy: 93.76%
Epoch 5/5
Train Loss: 2.4335, Accuracy: 96.66%


In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
# from sklearn.metrics import f1_score, precision_score, recall_score, average_precision_score # Optional, for more advanced metrics

# Assuming these are already defined in your script:
# model = YourMultiLabelModel().to(device)
# dataloaders = {'train': your_train_dataloader, 'valid': your_valid_dataloader, 'test': your_test_dataloader}
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# criterion = nn.BCEWithLogitsLoss()
# optimizer = optim.Adam(model.parameters(), lr=1e-4)
# num_epochs = 5
# dataset_sizes = {'train': len(dataloaders['train'].dataset), 'valid': len(dataloaders['valid'].dataset), 'test': len(dataloaders['test'].dataset)}
# class_names = ['class1', 'class2', 'class3'] # Example, replace with your actual class names or count

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")

    # Each epoch has a training and validation phase
    for phase in ['train', 'valid']: # Add 'test' if you want to evaluate on test set in each epoch
        if phase == 'train':
            model.train()  # Set model to training mode
        else:
            model.eval()   # Set model to evaluate mode (for validation/test)

        running_loss = 0.0
        running_corrects_exact_match = 0 # For exact match accuracy per image
        running_corrects_per_label = 0   # For accuracy of individual binary predictions

        # Lists to store predictions and true labels for more advanced metrics (optional)
        # all_preds = []
        # all_labels = []

        # Iterate over data
        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device) # Labels should be float for BCEWithLogitsLoss

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward
            # Track history if only in train
            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs) # Get model predictions (logits)
                loss = criterion(outputs, labels.float()) # Ensure labels are float

                # Get binary predictions for accuracy calculation
                preds_binary = (torch.sigmoid(outputs) > 0.5).float() # Threshold at 0.5

                # Backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            # Statistics
            running_loss += loss.item() * inputs.size(0) # Accumulate loss weighted by batch size

            # Calculate "exact match" accuracy (all labels for a sample must be correct)
            running_corrects_exact_match += (preds_binary == labels).all(dim=1).sum().item()

            # Calculate "accuracy per label" (individual correct binary predictions)
            # This is often more useful for multi-label classification
            running_corrects_per_label += (preds_binary == labels).float().sum().item()

            # Optionally, collect predictions and labels for more metrics later
            # all_preds.append(preds_binary.cpu())
            # all_labels.append(labels.cpu())

        # Epoch statistics
        epoch_loss = running_loss / dataset_sizes[phase]

        # Calculate exact match accuracy
        epoch_acc_exact_match = running_corrects_exact_match / dataset_sizes[phase]

        # Calculate accuracy per label
        # Divide by total number of *individual label predictions*
        total_individual_predictions = dataset_sizes[phase] * len(class_names) # Or labels.size(1) if labels.size(1) is consistent
        epoch_acc_per_label = running_corrects_per_label / total_individual_predictions if total_individual_predictions > 0 else 0


        print(f'{phase} Loss: {epoch_loss:.4f} Exact Match Acc: {epoch_acc_exact_match:.4f} Per-Label Acc: {epoch_acc_per_label:.4f}')

        # Save best model (optional, typically based on validation metric)
        # if phase == 'valid' and epoch_acc_per_label > best_acc:
        #     best_acc = epoch_acc_per_label
        #     torch.save(model.state_dict(), 'best_model.pth')

    print("-" * 20) # Separator for epochs

# Final evaluation on test set after training (optional, but good practice)
# model.eval()
# test_running_loss = 0.0
# test_correct_exact_match = 0
# test_correct_per_label = 0
# for inputs, labels in dataloaders['test']:
#     inputs, labels = inputs.to(device), labels.to(device)
#     with torch.no_grad():
#         outputs = model(inputs)
#         loss = criterion(outputs, labels.float())
#         preds_binary = (torch.sigmoid(outputs) > 0.5).float()
#     test_running_loss += loss.item() * inputs.size(0)
#     test_correct_exact_match += (preds_binary == labels).all(dim=1).sum().item()
#     test_correct_per_label += (preds_binary == labels).float().sum().item()

# test_loss = test_running_loss / dataset_sizes['test']
# test_acc_exact_match = test_correct_exact_match / dataset_sizes['test']
# test_total_individual_predictions = dataset_sizes['test'] * len(class_names)
# test_acc_per_label = test_correct_per_label / test_total_individual_predictions if test_total_individual_predictions > 0 else 0

# print(f"Test Loss: {test_loss:.4f} Test Exact Match Acc: {test_acc_exact_match:.4f} Test Per-Label Acc: {test_acc_per_label:.4f}")

Epoch 1/5
train Loss: 0.0366 Exact Match Acc: 0.9821 Per-Label Acc: 0.9930
valid Loss: 0.4368 Exact Match Acc: 0.7420 Per-Label Acc: 0.8575
--------------------
Epoch 2/5
train Loss: 0.0298 Exact Match Acc: 0.9771 Per-Label Acc: 0.9914
valid Loss: 0.4097 Exact Match Acc: 0.7420 Per-Label Acc: 0.8622
--------------------
Epoch 3/5
train Loss: 0.0213 Exact Match Acc: 0.9846 Per-Label Acc: 0.9949
valid Loss: 0.3924 Exact Match Acc: 0.7562 Per-Label Acc: 0.8704
--------------------
Epoch 4/5
train Loss: 0.0160 Exact Match Acc: 0.9944 Per-Label Acc: 0.9981
valid Loss: 0.4198 Exact Match Acc: 0.7420 Per-Label Acc: 0.8622
--------------------
Epoch 5/5
train Loss: 0.0136 Exact Match Acc: 0.9932 Per-Label Acc: 0.9975
valid Loss: 0.4216 Exact Match Acc: 0.7456 Per-Label Acc: 0.8681
--------------------


In [12]:
# ... (existing code for loading best model weights) ...

test_running_corrects = 0
test_total_predictions = 0 # To count total individual label predictions

with torch.no_grad():
    for inputs, labels in dataloaders['test']:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        preds = torch.sigmoid(outputs) > 0.5 # <--- CHANGED: Multi-label predictions

        test_running_corrects += (preds == labels).float().sum().item() # <--- CHANGED: Multi-label accuracy calc
        test_total_predictions += labels.numel() # Total number of individual labels in this batch (batch_size * num_classes) # <--- CHANGED

# Calculate final test accuracy for multi-label
test_acc = test_running_corrects / test_total_predictions # <--- CHANGED: Multi-label accuracy calc
print(f'Test Accuracy of the best model: {test_acc:.4f}')

Test Accuracy of the best model: 0.9101


In [13]:
num_epochs = 30 # Number of training epochs
best_model_wts_path = "/kaggle/working/best_classification_model.pth" # Path to save the best model
best_acc = 0.0 # Initialize best accuracy for saving the best model
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5) # <--- THIS LINE

print("\n--- Starting Training ---")
# Iterate through each epoch
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    print("-" * 10)

    # Each epoch has a training and validation phase
    for phase in ['train', 'valid']:
        if phase == 'train':
            model.train() # Set model to training mode (enables dropout, batchnorm updates)
        else:
            model.eval() # Set model to evaluate mode (disables dropout, fixes batchnorm)

        running_loss = 0.0
        running_corrects = 0.0 # Changed to float for consistency in multi-label accuracy
        running_total_predictions = 0 # Added to track total individual label predictions

        # Iterate over data batches
        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device) # Move inputs to the device
            labels = labels.to(device) # Move labels (now multi-hot) to the device

            # Zero the parameter gradients before each batch
            optimizer.zero_grad()

            # Forward pass: track gradients only if in training phase
            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs) # Get model predictions (raw logits)
                
                # --- START Multi-Label Specific Changes ---
                preds = torch.sigmoid(outputs) > 0.5 # Apply sigmoid and threshold for multi-label predictions
                loss = criterion(outputs, labels) # BCEWithLogitsLoss expects raw outputs and float labels
                # --- END Multi-Label Specific Changes ---

                # Backward pass + optimize only if in training phase
                if phase == 'train':
                    loss.backward() # Compute gradients
                    optimizer.step() # Update model parameters

            # Statistics for the current phase
            running_loss += loss.item() * inputs.size(0) # Accumulate batch loss
            
            # --- START Multi-Label Specific Changes ---
            # For multi-label, calculate accuracy per individual label prediction
            running_corrects += (preds == labels).float().sum().item() # Sum correctly predicted individual labels
            running_total_predictions += labels.numel() # Total number of individual labels (batch_size * num_classes)
            # --- END Multi-Label Specific Changes ---

        # Calculate epoch-level loss and accuracy
        epoch_loss = running_loss / dataset_sizes[phase]
        # Accuracy is now sum of correct individual labels / total number of individual labels
        epoch_acc = running_corrects / running_total_predictions # Adjusted for multi-label accuracy

        print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

        # Step the learning rate scheduler if in validation phase
        if phase == 'valid':
            scheduler.step(epoch_loss) # Update scheduler based on validation loss

            # Save the model if it's the best performing on the validation set
            if epoch_acc > best_acc:
                best_acc = epoch_acc
                torch.save(model.state_dict(), best_model_wts_path)
                print(f"New best validation accuracy: {best_acc:.4f}. Model saved to {best_model_wts_path}")

print("\n--- Training Complete ---")
print(f"Best validation accuracy achieved: {best_acc:.4f}")


--- Starting Training ---
Epoch 1/30
----------
train Loss: 0.0121 Acc: 0.9984
valid Loss: 0.4340 Acc: 0.8598
New best validation accuracy: 0.8598. Model saved to /kaggle/working/best_classification_model.pth
Epoch 2/30
----------
train Loss: 0.0092 Acc: 0.9992
valid Loss: 0.4867 Acc: 0.8645
New best validation accuracy: 0.8645. Model saved to /kaggle/working/best_classification_model.pth
Epoch 3/30
----------
train Loss: 0.0097 Acc: 0.9988
valid Loss: 0.4588 Acc: 0.8645
Epoch 4/30
----------
train Loss: 0.0076 Acc: 0.9994
valid Loss: 0.4771 Acc: 0.8610
Epoch 5/30
----------
train Loss: 0.0062 Acc: 0.9992
valid Loss: 0.4721 Acc: 0.8634
Epoch 6/30
----------
train Loss: 0.0126 Acc: 0.9971
valid Loss: 0.4875 Acc: 0.8681
New best validation accuracy: 0.8681. Model saved to /kaggle/working/best_classification_model.pth
Epoch 7/30
----------
train Loss: 0.0108 Acc: 0.9975
valid Loss: 0.5663 Acc: 0.8575
Epoch 8/30
----------
train Loss: 0.0059 Acc: 0.9994
valid Loss: 0.5303 Acc: 0.8587
Epoc

In [14]:
# --- 8. Load Best Model and Evaluate on Test Set ---
print("\n--- Evaluating Best Model on Test Set ---")
# Load the weights of the best performing model from validation
# (Assuming 'model' is already defined and moved to device from Section 6)
model.load_state_dict(torch.load(best_model_wts_path, map_location=device)) # Added map_location for robustness
model.eval() # Set model to evaluation mode

test_running_corrects = 0.0 # Changed to float
test_total_predictions = 0 # Added for multi-label accuracy

# Disable gradient calculation for evaluation to save memory and speed up computation
with torch.no_grad(): 
    for inputs, labels in dataloaders['test']:
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        # --- START Multi-Label Specific Changes ---
        preds = torch.sigmoid(outputs) > 0.5 # Apply sigmoid and threshold for multi-label predictions
        # --- END Multi-Label Specific Changes ---

        test_running_corrects += (preds == labels).float().sum().item() # Sum correct individual labels
        test_total_predictions += labels.numel() # Total individual labels (batch_size * num_classes)

# Calculate final test accuracy
test_acc = test_running_corrects / test_total_predictions # Adjusted for multi-label accuracy
print(f'Test Accuracy of the best model: {test_acc:.4f}') # This line prints the test accuracy


--- Evaluating Best Model on Test Set ---
Test Accuracy of the best model: 0.9153
