In [3]:
!nvidia-smi

Fri Nov 29 18:51:50 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.90.07              Driver Version: 550.90.07      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          On  |   00000000:07:00.0 Off |                    0 |
| N/A   42C    P0            131W /  400W |   18119MiB /  40960MiB |    100%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA A100-SXM4-40GB          On  |   00

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint


In [None]:
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# Define SimCLR-specific augmentations
def get_simclr_transforms():
    return transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomApply([transforms.ColorJitter()], p=0.8),
        transforms.RandomGrayscale(p=0.2),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

# Custom Dataset to Ignore Folder-Based Labels
class UnlabeledDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = [
            os.path.join(dp, f) for dp, dn, filenames in os.walk(root_dir) for f in filenames if f.endswith(('.png', '.jpg', '.jpeg'))
        ]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            # Apply two transformations for contrastive views
            view1 = self.transform(image)
            view2 = self.transform(image)
            return (view1, view2), 0  # Label is dummy (not used in SimCLR)
        return image

# Dataset and DataLoader
unlabeled_dataset = UnlabeledDataset(root_dir='data/data_2', transform=get_simclr_transforms())
unlabeled_loader = DataLoader(unlabeled_dataset, batch_size=64, shuffle=True)


In [None]:
import torch
from torch import nn, optim
import torch.nn.functional as F
import torchvision
import pytorch_lightning as pl

class SimCLR(pl.LightningModule):
    def __init__(self, hidden_dim, lr, temperature, weight_decay, max_epochs=500):
        super().__init__()
        self.save_hyperparameters()
        assert self.hparams.temperature > 0.15, "The temperature must be a positive float!"
        
        # Base encoder f(.)
        self.encoder = torchvision.models.resnet50(pretrained=False)
        self.encoder.fc = nn.Sequential(
            nn.Linear(self.encoder.fc.in_features, 4 * hidden_dim),
            nn.ReLU(inplace=True),
            nn.Linear(4 * hidden_dim, hidden_dim)
        )
        
    def forward(self, x):
        return self.encoder(x)

    def info_nce_loss(self, batch):
        (view1, view2), _ = batch
        # Concatenate views and compute features
        images = torch.cat([view1, view2], dim=0)
        features = self.encoder(images)

        # Normalize feature embeddings
        features = F.normalize(features, dim=1)

        # Compute cosine similarity
        batch_size = view1.size(0)
        similarity_matrix = torch.matmul(features, features.T)

        # Mask self-similarity
        mask = torch.eye(similarity_matrix.size(0), device=similarity_matrix.device).bool()
        similarity_matrix.masked_fill_(mask, -9e15)

        # Positive pairs
        pos_mask = mask.roll(shifts=batch_size, dims=0)
        positives = similarity_matrix[pos_mask]

        # Compute loss
        similarity_matrix /= self.hparams.temperature
        log_probs = similarity_matrix.log_softmax(dim=-1)
        loss = -log_probs[pos_mask].mean()
        return loss

    def training_step(self, batch, batch_idx):
        loss = self.info_nce_loss(batch)
        self.log("train_loss", loss)
        return loss

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.hparams.lr, weight_decay=self.hparams.weight_decay)
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=self.hparams.max_epochs)
        return [optimizer], [scheduler]


In [None]:
from pytorch_lightning import Trainer

# Initialize the model
model = SimCLR(hidden_dim=128, lr=3e-4, temperature=0.5, weight_decay=1e-4, max_epochs=100)

# Trainer
trainer = Trainer(max_epochs=100,devices=8)
trainer.fit(model, unlabeled_loader)


In [None]:
backbone_state_dict = {k: v for k, v in model.encoder.state_dict().items()}
torch.save(backbone_state_dict, "simclr.pth")


In [None]:
import torch
from torch import nn, optim
import torch.nn.functional as F
import torchvision
import pytorch_lightning as pl
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

class SimCLRFineTuning(pl.LightningModule):
    def __init__(self, num_classes, backbone_path, lr=1e-4, weight_decay=1e-4):
        super().__init__()
        self.save_hyperparameters()
        
        # Create a fresh ResNet50 backbone
        self.encoder = torchvision.models.resnet50(pretrained=False)
        
        # Load the saved SimCLR backbone state dict
        backbone_state_dict = torch.load(backbone_path)
        
        # Load the state dict to the encoder, excluding the final FC layer
        model_dict = self.encoder.state_dict()
        backbone_dict = {k: v for k, v in backbone_state_dict.items() if k in model_dict and not k.startswith("fc")}
        model_dict.update(backbone_dict)
        self.encoder.load_state_dict(model_dict)
        
        # Freeze the encoder weights
        for param in self.encoder.parameters():
            param.requires_grad = False
        
        # Replace the final FC layer with a new classification head
        self.encoder.fc = nn.Sequential(
            nn.Linear(self.encoder.fc.in_features, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )
    
    def forward(self, x):
        return self.encoder(x)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = torch.sum(preds == y).float() / len(y)
        self.log('val_loss', loss, on_epoch=True, prog_bar=True)
        self.log('val_acc', acc, on_epoch=True, prog_bar=True)
        return loss
    
    def configure_optimizers(self):
        # Only optimize the new classification head
        optimizer = optim.Adam(
            self.encoder.fc.parameters(), 
            lr=self.hparams.lr, 
            weight_decay=self.hparams.weight_decay
        )
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, 
            mode='min', 
            factor=0.1, 
            patience=3
        )
        return {
            'optimizer': optimizer,
            'lr_scheduler': {
                'scheduler': scheduler,
                'monitor': 'val_loss'
            }
        }


import torch
import numpy as np
from torch import nn, optim
import torch.nn.functional as F
import torchvision
import pytorch_lightning as pl
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder

class AddGaussianNoise(object):
    def __init__(self, mean=0., std=0.1, p=0.5):
        self.std = std
        self.mean = mean
        self.p = p
        
    def __call__(self, tensor):
        if np.random.random() < self.p:
            return tensor + torch.randn(tensor.size()) * self.std + self.mean
        return tensor
    
    def __repr__(self):
        return self.__class__.__name__ + '(mean={0}, std={1}, p={2})'.format(self.mean, self.std, self.p)

class AddSaltPepperNoise(object):
    def __init__(self, salt_prob=0.05, pepper_prob=0.05, p=0.5):
        self.salt_prob = salt_prob
        self.pepper_prob = pepper_prob
        self.p = p
        
    def __call__(self, tensor):
        if np.random.random() < self.p:
            noise = torch.zeros_like(tensor)
            salt_mask = torch.rand_like(tensor) < self.salt_prob
            pepper_mask = torch.rand_like(tensor) < self.pepper_prob
            
            noise[salt_mask] = 1.0
            noise[pepper_mask] = 0.0
            
            return torch.clamp(tensor + noise, 0, 1)
        return tensor
    
    def __repr__(self):
        return self.__class__.__name__ + '(salt_prob={0}, pepper_prob={1}, p={2})'.format(
            self.salt_prob, self.pepper_prob, self.p)

def prepare_dataset(data_dir, input_size=224, batch_size=56):
    # Define enhanced transforms with additional augmentations
    train_transforms = transforms.Compose([
        # Geometric Transformations
        transforms.Resize((input_size, input_size)),
        transforms.RandomHorizontalFlip(),
        # Corrected rotation specification
        transforms.RandomRotation(degrees=(0, 180)),  # Rotation between 0 and 180 degrees
        
        # Convert to Tensor to apply noise transformations
        transforms.ToTensor(),
        
        # Pixel Transformations
        # Gaussian Noise with 50% probability
        AddGaussianNoise(mean=0.0, std=0.1, p=0.5),
        
        # Salt and Pepper Noise with 50% probability
        AddSaltPepperNoise(salt_prob=0.05, pepper_prob=0.05, p=0.5),
        
        # Normalization
        transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                             std=[0.229, 0.224, 0.225])
    ])
    
    # Validation transforms remain the same (without augmentations)
    val_transforms = transforms.Compose([
        transforms.Resize((input_size, input_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                             std=[0.229, 0.224, 0.225])
    ])
    
    # Load datasets
    train_dataset = ImageFolder(root=data_dir, transform=train_transforms)
    val_size = len(train_dataset) // 10  # 10% validation split
    train_dataset, val_dataset = torch.utils.data.random_split(
        train_dataset, 
        [len(train_dataset) - val_size, val_size]
    )
    
    # Create dataloaders
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=True, 
        num_workers=4
    )
    val_loader = DataLoader(
        val_dataset, 
        batch_size=batch_size, 
        shuffle=False, 
        num_workers=256
    )
    
    return train_loader, val_loader, len(train_dataset.dataset.classes)

def fine_tune_simclr(backbone_path, data_dir):
    # Prepare the dataset
    train_loader, val_loader, num_classes = prepare_dataset(data_dir)
    
    # Create fine-tuning model
    fine_tuning_model = SimCLRFineTuning(
        num_classes=num_classes,
        backbone_path=backbone_path
    )
    
    # Setup trainer
    trainer = pl.Trainer(
        max_epochs=20,
        devices=8,  # Adjust based on your GPU availability
        accelerator='gpu',
        precision=16,  # Mixed precision training
        callbacks=[
            pl.callbacks.ModelCheckpoint(
                monitor='val_acc', 
                mode='max', 
                save_top_k=1
            ),
            pl.callbacks.EarlyStopping(
                monitor='val_loss', 
                patience=5
            )
        ]
    )
    
    # Train the model
    trainer.fit(fine_tuning_model, train_loader, val_loader)
    
    return fine_tuning_model

fine_tuned_model = fine_tune_simclr(
     backbone_path='simclr.pth', 
     data_dir='dataset_final'
)

In [None]:
import torch
import pytorch_lightning as pl
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import models, datasets, transforms

# Define the model class for fine-tuning
class FineTuneResNet50(pl.LightningModule):
    def __init__(self, num_classes, pretrained_path=None, hidden_dim=512):
        super(FineTuneResNet50, self).__init__()
        
        # Initialize ResNet50 without the final fully connected layer (classification head)
        self.encoder = models.resnet50(pretrained=False)
        
        if pretrained_path:
            # Load pre-trained weights into the model (excluding the 'fc' layers)
            state_dict = torch.load(pretrained_path)
            # Remove the fc layer weights from the state_dict
            state_dict = {k: v for k, v in state_dict.items() if 'fc' not in k}
            # Load the state_dict into the model
            self.encoder.load_state_dict(state_dict, strict=False)
        
        # Get the number of input features from the last layer of ResNet50 (before replacing it)
        num_features = self.encoder.fc.in_features
        
        # Remove the fully connected layer (classification head)
        self.encoder.fc = nn.Identity()  # Identity layer to remove the classifier
        
        # Replace with new classifier (fully connected layer)
        self.classifier = nn.Sequential(
            nn.Linear(num_features, hidden_dim),  # Use the correct number of features
            nn.ReLU(),
            nn.Linear(hidden_dim, num_classes)
        )

        # Initialize the list to store validation outputs (losses)
        self.validation_outputs = []

    def forward(self, x):
        features = self.encoder(x)
        logits = self.classifier(features)
        return logits

    def training_step(self, batch, batch_idx):
        images, labels = batch
        outputs = self(images)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        return loss

    def validation_step(self, batch, batch_idx):
        images, labels = batch
        outputs = self(images)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        self.validation_outputs.append(loss)  # Append the loss to the list
        return loss

    def on_validation_epoch_end(self):
        avg_loss = torch.stack(self.validation_outputs).mean()
        print(f'Validation loss: {avg_loss.item()}')  # Manually print the validation loss
        self.log('val_loss', avg_loss, prog_bar=True)
        self.validation_outputs.clear()

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=1e-4)
        return optimizer

# Data transformation and dataset setup
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize the images to the expected input size for ResNet50
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Pretrained ResNet mean and std
])

# Load the dataset from the directory structure
dataset = datasets.ImageFolder(root="dataset_final", transform=transform)

# Correct split lengths
train_size = int(0.7 * len(dataset))  # 70% for training
val_size = int(0.2 * len(dataset))    # 20% for validation
test_size = len(dataset) - train_size - val_size  # Remaining 10% for testing

# Split the dataset into training, validation, and test sets
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# DataLoader for training, validation, and test datasets
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Initialize fine-tune model
num_classes = len(dataset.classes)  # Number of output classes
fine_tune_model = FineTuneResNet50(num_classes=num_classes, pretrained_path="simclr_backbone.pth")

# Trainer setup
trainer = pl.Trainer(max_epochs=10, devices=1)

# Training the model
trainer.fit(fine_tune_model, train_dataloader, val_dataloader)

# After training, you can evaluate on the test set
test_result = trainer.test(fine_tune_model, test_dataloader)
print(test_result)


In [7]:
import os
import torch
import torchvision
import pytorch_lightning as pl
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    classification_report, 
    confusion_matrix, 
    precision_recall_fscore_support
)
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from torch.utils.data import DataLoader
from torchvision import transforms, models
from torchvision.datasets import ImageFolder

class ResNet50DataModule(pl.LightningDataModule):
    def __init__(self, data_dir, batch_size=32, num_workers=4):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.num_workers = num_workers
        
        # Immediately determine number of classes
        full_dataset = ImageFolder(root=self.data_dir)
        self.class_names = full_dataset.classes
        self.num_classes = len(self.class_names)
        
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(10),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            )
        ])
    
    def setup(self, stage=None):
        full_dataset = ImageFolder(root=self.data_dir, transform=self.transform)
        
        # Verify class consistency
        print("Detected Classes:", self.class_names)
        print("Total number of classes:", self.num_classes)
        
        # Split dataset into train, validation, and test sets
        train_size = int(0.7 * len(full_dataset))
        val_size = int(0.15 * len(full_dataset))
        test_size = len(full_dataset) - train_size - val_size
        
        self.train_dataset, self.val_dataset, self.test_dataset = torch.utils.data.random_split(
            full_dataset, [train_size, val_size, test_size]
        )
    
    def train_dataloader(self):
        return DataLoader(
            self.train_dataset, 
            batch_size=self.batch_size, 
            shuffle=True, 
            num_workers=self.num_workers
        )
    
    def val_dataloader(self):
        return DataLoader(
            self.val_dataset, 
            batch_size=self.batch_size, 
            shuffle=False, 
            num_workers=self.num_workers
        )
    
    def test_dataloader(self):
        return DataLoader(
            self.test_dataset, 
            batch_size=self.batch_size, 
            shuffle=False, 
            num_workers=self.num_workers
        )

class ResNet50Classifier(pl.LightningModule):
    def __init__(self, num_classes, class_names, learning_rate=1e-5):
        super().__init__()
        
        self.model = models.resnet152(pretrained=True)
        num_ftrs = self.model.fc.in_features
        self.model.fc = torch.nn.Linear(num_ftrs, num_classes)
        
        self.num_classes = num_classes
        self.class_names = class_names
        self.learning_rate = learning_rate
        self.loss = torch.nn.CrossEntropyLoss()
        
        # Store predictions and targets for evaluation
        self.test_predictions = []
        self.test_targets = []
    
    def forward(self, x):
        return self.model(x)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.loss(logits, y)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.loss(logits, y)
        
        preds = torch.argmax(logits, dim=1)
        acc = torch.sum(preds == y).float() / len(y)
        
        self.log('val_loss', loss, on_epoch=True, prog_bar=True)
        self.log('val_accuracy', acc, on_epoch=True, prog_bar=True)
        
        return loss
    
    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        preds = torch.argmax(logits, dim=1)
        
        # Store predictions and targets for later evaluation
        self.test_predictions.extend(preds.cpu().numpy())
        self.test_targets.extend(y.cpu().numpy())
        
        return {'preds': preds, 'targets': y}
    
    def on_test_epoch_end(self):
        # Convert to numpy arrays
        predictions = np.array(self.test_predictions)
        targets = np.array(self.test_targets)
        
        # Compute detailed metrics
        precision, recall, f1, _ = precision_recall_fscore_support(
            targets, predictions, average='weighted'
        )
        
        # Detailed classification report
        print("\nClassification Report:")
        print(classification_report(
            targets, 
            predictions, 
            target_names=self.class_names,
            labels=range(len(self.class_names))
        ))
        
        # Confusion Matrix
        cm = confusion_matrix(targets, predictions)
        plt.figure(figsize=(10, 8))
        sns.heatmap(
            cm, 
            annot=True, 
            fmt='d', 
            cmap='Blues', 
            xticklabels=self.class_names,
            yticklabels=self.class_names
        )
        plt.title('Confusion Matrix')
        plt.xlabel('Predicted')
        plt.ylabel('Actual')
        plt.tight_layout()
        plt.savefig('confusion_matrix.png')
        plt.close()
        
        # Log metrics
        self.log('test_precision', precision)
        self.log('test_recall', recall)
        self.log('test_f1_score', f1)
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, 
            mode='min', 
            factor=0.1, 
            patience=3
        )
        return {
            'optimizer': optimizer,
            'lr_scheduler': {
                'scheduler': scheduler,
                'monitor': 'val_loss'
            }
        }

def main():
    # Set hyperparameters
    data_dir = 'dataset_final'  # Replace with your dataset directory
    batch_size = 50
    max_epochs = 50
    
    # Set random seed for reproducibility
    pl.seed_everything(42)
    
    # Prepare data module FIRST to determine num_classes
    data_module = ResNet50DataModule(
        data_dir=data_dir, 
        batch_size=batch_size
    )
    
    # Print out class names and number of classes
    print(f"Classes: {data_module.class_names}")
    print(f"Number of Classes: {data_module.num_classes}")
    
    # Initialize model with the num_classes and class_names
    model = ResNet50Classifier(
        num_classes=data_module.num_classes, 
        class_names=data_module.class_names
    )
    
    # Setup checkpointing
    checkpoint_callback = ModelCheckpoint(
        monitor='val_loss',
        mode='min',
        save_top_k=3,
        filename='resnet50-{epoch:02d}-{val_loss:.2f}'
    )
    
    # Early stopping
    early_stop_callback = EarlyStopping(
        monitor='val_loss',
        patience=20,
        verbose=True,
        mode='min'
    )
    
    # Initialize trainer
    trainer = pl.Trainer(
        max_epochs=max_epochs,
        accelerator='gpu' if torch.cuda.is_available() else 'cpu',
        callbacks=[checkpoint_callback, early_stop_callback],
        devices=8
    )
    
    # Train the model
    trainer.fit(model, datamodule=data_module)
    
    # Perform testing and evaluation
    trainer.test(model, datamodule=data_module)

if __name__ == "__main__":
    main()

Seed set to 42


Classes: ['Core deformation', 'Core displacement', 'Core split', 'Foreign Object Damage', 'Resin buildup', 'Splice gap']
Number of Classes: 6


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('mediu

Detected Classes:Detected Classes:  Detected Classes:Detected Classes:['Core deformation', 'Core displacement', 'Core split', 'Foreign Object Damage', 'Resin buildup', 'Splice gap']Detected Classes:['Core deformation', 'Core displacement', 'Core split', 'Foreign Object Damage', 'Resin buildup', 'Splice gap']
   
Total number of classes:['Core deformation', 'Core displacement', 'Core split', 'Foreign Object Damage', 'Resin buildup', 'Splice gap']['Core deformation', 'Core displacement', 'Core split', 'Foreign Object Damage', 'Resin buildup', 'Splice gap']['Core deformation', 'Core displacement', 'Core split', 'Foreign Object Damage', 'Resin buildup', 'Splice gap']Total number of classes: 

 
6Total number of classes:Detected Classes:6Total number of classes:Total number of classes: 

   66['Core deformation', 'Core displacement', 'Core split', 'Foreign Object Damage', 'Resin buildup', 'Splice gap']6



Detected Classes:Total number of classes: Detected Classes: ['Core deformation', 'Cor

LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name  | Type             | Params | Mode 
---------------------------------------------------
0 | model | ResNet           | 58.2 M | train
1 | loss  | CrossEntropyLoss | 0      | train
---------------------------------------------------
58.2 M    Trainable params
0         Non-trainable params
58.2 M    Total params
232.624   Total estimated model params size (MB)
424       Modules in train mode
0         Modules in eval mode


Sanity Checking: |                                                                                            …

/opt/conda/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('val_loss', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/opt/conda/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('val_accuracy', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/opt/conda/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (6) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |                                                                                                   …

Validation: |                                                                                                 …

/opt/conda/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('train_loss', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
[rank: 0] Metric val_loss improved. New best score: 1.746
[rank: 6] Metric val_loss improved. New best score: 1.792
[rank: 4] Metric val_loss improved. New best score: 1.843
[rank: 2] Metric val_loss improved. New best score: 1.754
[rank: 3] Metric val_loss improved. New best score: 1.764
[rank: 5] Metric val_loss improved. New best score: 1.704
[rank: 7] Metric val_loss improved. New best score: 1.767
[rank: 1] Metric val_loss improved. New best score: 1.772


Validation: |                                                                                                 …

[rank: 5] Metric val_loss improved by 0.145 >= min_delta = 0.0. New best score: 1.559
[rank: 6] Metric val_loss improved by 0.119 >= min_delta = 0.0. New best score: 1.673
[rank: 0] Metric val_loss improved by 0.146 >= min_delta = 0.0. New best score: 1.600
[rank: 4] Metric val_loss improved by 0.114 >= min_delta = 0.0. New best score: 1.729
[rank: 2] Metric val_loss improved by 0.147 >= min_delta = 0.0. New best score: 1.607
[rank: 1] Metric val_loss improved by 0.120 >= min_delta = 0.0. New best score: 1.651
[rank: 7] Metric val_loss improved by 0.157 >= min_delta = 0.0. New best score: 1.610
[rank: 3] Metric val_loss improved by 0.138 >= min_delta = 0.0. New best score: 1.626


Validation: |                                                                                                 …

[rank: 6] Metric val_loss improved by 0.117 >= min_delta = 0.0. New best score: 1.556
[rank: 0] Metric val_loss improved by 0.133 >= min_delta = 0.0. New best score: 1.467
[rank: 1] Metric val_loss improved by 0.085 >= min_delta = 0.0. New best score: 1.566
[rank: 2] Metric val_loss improved by 0.108 >= min_delta = 0.0. New best score: 1.499
[rank: 5] Metric val_loss improved by 0.099 >= min_delta = 0.0. New best score: 1.460
[rank: 3] Metric val_loss improved by 0.105 >= min_delta = 0.0. New best score: 1.521
[rank: 4] Metric val_loss improved by 0.103 >= min_delta = 0.0. New best score: 1.626
[rank: 7] Metric val_loss improved by 0.135 >= min_delta = 0.0. New best score: 1.474


Validation: |                                                                                                 …

[rank: 6] Metric val_loss improved by 0.115 >= min_delta = 0.0. New best score: 1.440
[rank: 7] Metric val_loss improved by 0.076 >= min_delta = 0.0. New best score: 1.399
[rank: 1] Metric val_loss improved by 0.085 >= min_delta = 0.0. New best score: 1.481
[rank: 0] Metric val_loss improved by 0.134 >= min_delta = 0.0. New best score: 1.333
[rank: 3] Metric val_loss improved by 0.080 >= min_delta = 0.0. New best score: 1.441
[rank: 4] Metric val_loss improved by 0.094 >= min_delta = 0.0. New best score: 1.532
[rank: 5] Metric val_loss improved by 0.101 >= min_delta = 0.0. New best score: 1.359
[rank: 2] Metric val_loss improved by 0.103 >= min_delta = 0.0. New best score: 1.396


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.067 >= min_delta = 0.0. New best score: 1.266
[rank: 6] Metric val_loss improved by 0.070 >= min_delta = 0.0. New best score: 1.370
[rank: 3] Metric val_loss improved by 0.091 >= min_delta = 0.0. New best score: 1.350
[rank: 4] Metric val_loss improved by 0.101 >= min_delta = 0.0. New best score: 1.431
[rank: 1] Metric val_loss improved by 0.111 >= min_delta = 0.0. New best score: 1.370
[rank: 7] Metric val_loss improved by 0.070 >= min_delta = 0.0. New best score: 1.329
[rank: 5] Metric val_loss improved by 0.074 >= min_delta = 0.0. New best score: 1.285
[rank: 2] Metric val_loss improved by 0.076 >= min_delta = 0.0. New best score: 1.320


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.099 >= min_delta = 0.0. New best score: 1.166
[rank: 6] Metric val_loss improved by 0.060 >= min_delta = 0.0. New best score: 1.310
[rank: 2] Metric val_loss improved by 0.097 >= min_delta = 0.0. New best score: 1.222
[rank: 4] Metric val_loss improved by 0.078 >= min_delta = 0.0. New best score: 1.353
[rank: 7] Metric val_loss improved by 0.101 >= min_delta = 0.0. New best score: 1.229
[rank: 3] Metric val_loss improved by 0.068 >= min_delta = 0.0. New best score: 1.282
[rank: 1] Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 1.370
[rank: 5] Metric val_loss improved by 0.039 >= min_delta = 0.0. New best score: 1.246


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.064 >= min_delta = 0.0. New best score: 1.103
[rank: 2] Metric val_loss improved by 0.031 >= min_delta = 0.0. New best score: 1.191
[rank: 1] Metric val_loss improved by 0.074 >= min_delta = 0.0. New best score: 1.295
[rank: 4] Metric val_loss improved by 0.057 >= min_delta = 0.0. New best score: 1.296
[rank: 3] Metric val_loss improved by 0.042 >= min_delta = 0.0. New best score: 1.241
[rank: 7] Metric val_loss improved by 0.027 >= min_delta = 0.0. New best score: 1.201
[rank: 5] Metric val_loss improved by 0.110 >= min_delta = 0.0. New best score: 1.136
[rank: 6] Metric val_loss improved by 0.049 >= min_delta = 0.0. New best score: 1.261


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.039 >= min_delta = 0.0. New best score: 1.064
[rank: 2] Metric val_loss improved by 0.112 >= min_delta = 0.0. New best score: 1.080
[rank: 3] Metric val_loss improved by 0.074 >= min_delta = 0.0. New best score: 1.167
[rank: 1] Metric val_loss improved by 0.125 >= min_delta = 0.0. New best score: 1.171
[rank: 6] Metric val_loss improved by 0.114 >= min_delta = 0.0. New best score: 1.147
[rank: 4] Metric val_loss improved by 0.079 >= min_delta = 0.0. New best score: 1.217
[rank: 7] Metric val_loss improved by 0.067 >= min_delta = 0.0. New best score: 1.135
[rank: 5] Metric val_loss improved by 0.081 >= min_delta = 0.0. New best score: 1.055


Validation: |                                                                                                 …

[rank: 6] Metric val_loss improved by 0.086 >= min_delta = 0.0. New best score: 1.061
[rank: 4] Metric val_loss improved by 0.041 >= min_delta = 0.0. New best score: 1.177
[rank: 2] Metric val_loss improved by 0.028 >= min_delta = 0.0. New best score: 1.051
[rank: 5] Metric val_loss improved by 0.105 >= min_delta = 0.0. New best score: 0.950
[rank: 3] Metric val_loss improved by 0.063 >= min_delta = 0.0. New best score: 1.104
[rank: 0] Metric val_loss improved by 0.130 >= min_delta = 0.0. New best score: 0.933
[rank: 1] Metric val_loss improved by 0.044 >= min_delta = 0.0. New best score: 1.126
[rank: 7] Metric val_loss improved by 0.084 >= min_delta = 0.0. New best score: 1.051


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.060 >= min_delta = 0.0. New best score: 0.873
[rank: 5] Metric val_loss improved by 0.040 >= min_delta = 0.0. New best score: 0.910
[rank: 4] Metric val_loss improved by 0.111 >= min_delta = 0.0. New best score: 1.065
[rank: 6] Metric val_loss improved by 0.075 >= min_delta = 0.0. New best score: 0.986
[rank: 1] Metric val_loss improved by 0.039 >= min_delta = 0.0. New best score: 1.088
[rank: 2] Metric val_loss improved by 0.085 >= min_delta = 0.0. New best score: 0.966
[rank: 3] Metric val_loss improved by 0.086 >= min_delta = 0.0. New best score: 1.018
[rank: 7] Metric val_loss improved by 0.065 >= min_delta = 0.0. New best score: 0.986


Validation: |                                                                                                 …

[rank: 3] Metric val_loss improved by 0.061 >= min_delta = 0.0. New best score: 0.957
[rank: 4] Metric val_loss improved by 0.052 >= min_delta = 0.0. New best score: 1.013
[rank: 7] Metric val_loss improved by 0.018 >= min_delta = 0.0. New best score: 0.968
[rank: 2] Metric val_loss improved by 0.061 >= min_delta = 0.0. New best score: 0.905
[rank: 0] Metric val_loss improved by 0.085 >= min_delta = 0.0. New best score: 0.789
[rank: 1] Metric val_loss improved by 0.054 >= min_delta = 0.0. New best score: 1.034
[rank: 6] Metric val_loss improved by 0.056 >= min_delta = 0.0. New best score: 0.930
[rank: 5] Metric val_loss improved by 0.102 >= min_delta = 0.0. New best score: 0.808


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.031 >= min_delta = 0.0. New best score: 0.758
[rank: 3] Metric val_loss improved by 0.088 >= min_delta = 0.0. New best score: 0.869
[rank: 4] Metric val_loss improved by 0.054 >= min_delta = 0.0. New best score: 0.959
[rank: 6] Metric val_loss improved by 0.080 >= min_delta = 0.0. New best score: 0.850
[rank: 7] Metric val_loss improved by 0.074 >= min_delta = 0.0. New best score: 0.894
[rank: 5] Metric val_loss improved by 0.058 >= min_delta = 0.0. New best score: 0.750
[rank: 2] Metric val_loss improved by 0.051 >= min_delta = 0.0. New best score: 0.855
[rank: 1] Metric val_loss improved by 0.103 >= min_delta = 0.0. New best score: 0.930


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.058 >= min_delta = 0.0. New best score: 0.700
[rank: 5] Metric val_loss improved by 0.068 >= min_delta = 0.0. New best score: 0.682
[rank: 4] Metric val_loss improved by 0.037 >= min_delta = 0.0. New best score: 0.923
[rank: 1] Metric val_loss improved by 0.046 >= min_delta = 0.0. New best score: 0.885
[rank: 2] Metric val_loss improved by 0.048 >= min_delta = 0.0. New best score: 0.807
[rank: 7] Metric val_loss improved by 0.014 >= min_delta = 0.0. New best score: 0.879
[rank: 3] Metric val_loss improved by 0.073 >= min_delta = 0.0. New best score: 0.796
[rank: 6] Metric val_loss improved by 0.037 >= min_delta = 0.0. New best score: 0.812


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.050 >= min_delta = 0.0. New best score: 0.650
[rank: 3] Metric val_loss improved by 0.057 >= min_delta = 0.0. New best score: 0.739
[rank: 5] Metric val_loss improved by 0.026 >= min_delta = 0.0. New best score: 0.656
[rank: 1] Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.883
[rank: 4] Metric val_loss improved by 0.109 >= min_delta = 0.0. New best score: 0.814
[rank: 6] Metric val_loss improved by 0.062 >= min_delta = 0.0. New best score: 0.750
[rank: 2] Metric val_loss improved by 0.066 >= min_delta = 0.0. New best score: 0.741
[rank: 7] Metric val_loss improved by 0.065 >= min_delta = 0.0. New best score: 0.814


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.042 >= min_delta = 0.0. New best score: 0.608
[rank: 3] Metric val_loss improved by 0.058 >= min_delta = 0.0. New best score: 0.681
[rank: 2] Metric val_loss improved by 0.030 >= min_delta = 0.0. New best score: 0.711
[rank: 7] Metric val_loss improved by 0.032 >= min_delta = 0.0. New best score: 0.782
[rank: 6] Metric val_loss improved by 0.018 >= min_delta = 0.0. New best score: 0.732
[rank: 1] Metric val_loss improved by 0.070 >= min_delta = 0.0. New best score: 0.813
[rank: 5] Metric val_loss improved by 0.021 >= min_delta = 0.0. New best score: 0.636


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.604
[rank: 4] Metric val_loss improved by 0.038 >= min_delta = 0.0. New best score: 0.776
[rank: 6] Metric val_loss improved by 0.013 >= min_delta = 0.0. New best score: 0.719
[rank: 7] Metric val_loss improved by 0.065 >= min_delta = 0.0. New best score: 0.716
[rank: 3] Metric val_loss improved by 0.042 >= min_delta = 0.0. New best score: 0.638
[rank: 1] Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.809
[rank: 2] Metric val_loss improved by 0.043 >= min_delta = 0.0. New best score: 0.668
[rank: 5] Metric val_loss improved by 0.065 >= min_delta = 0.0. New best score: 0.570


Validation: |                                                                                                 …

[rank: 2] Metric val_loss improved by 0.035 >= min_delta = 0.0. New best score: 0.633
[rank: 0] Metric val_loss improved by 0.017 >= min_delta = 0.0. New best score: 0.586
[rank: 7] Metric val_loss improved by 0.022 >= min_delta = 0.0. New best score: 0.694
[rank: 4] Metric val_loss improved by 0.069 >= min_delta = 0.0. New best score: 0.707
[rank: 6] Metric val_loss improved by 0.078 >= min_delta = 0.0. New best score: 0.641
[rank: 1] Metric val_loss improved by 0.065 >= min_delta = 0.0. New best score: 0.744
[rank: 3] Metric val_loss improved by 0.057 >= min_delta = 0.0. New best score: 0.581


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.072 >= min_delta = 0.0. New best score: 0.514
[rank: 1] Metric val_loss improved by 0.060 >= min_delta = 0.0. New best score: 0.683
[rank: 6] Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 0.636
[rank: 5] Metric val_loss improved by 0.041 >= min_delta = 0.0. New best score: 0.529
[rank: 4] Metric val_loss improved by 0.022 >= min_delta = 0.0. New best score: 0.686
[rank: 2] Metric val_loss improved by 0.046 >= min_delta = 0.0. New best score: 0.587
[rank: 3] Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 0.575
[rank: 7] Metric val_loss improved by 0.043 >= min_delta = 0.0. New best score: 0.651


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.014 >= min_delta = 0.0. New best score: 0.501
[rank: 5] Metric val_loss improved by 0.037 >= min_delta = 0.0. New best score: 0.492
[rank: 2] Metric val_loss improved by 0.045 >= min_delta = 0.0. New best score: 0.542
[rank: 1] Metric val_loss improved by 0.032 >= min_delta = 0.0. New best score: 0.651
[rank: 6] Metric val_loss improved by 0.074 >= min_delta = 0.0. New best score: 0.562
[rank: 7] Metric val_loss improved by 0.025 >= min_delta = 0.0. New best score: 0.626
[rank: 4] Metric val_loss improved by 0.041 >= min_delta = 0.0. New best score: 0.644


Validation: |                                                                                                 …

[rank: 5] Metric val_loss improved by 0.062 >= min_delta = 0.0. New best score: 0.430
[rank: 2] Metric val_loss improved by 0.018 >= min_delta = 0.0. New best score: 0.524
[rank: 3] Metric val_loss improved by 0.080 >= min_delta = 0.0. New best score: 0.495
[rank: 4] Metric val_loss improved by 0.022 >= min_delta = 0.0. New best score: 0.622
[rank: 7] Metric val_loss improved by 0.011 >= min_delta = 0.0. New best score: 0.615


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.057 >= min_delta = 0.0. New best score: 0.444
[rank: 7] Metric val_loss improved by 0.060 >= min_delta = 0.0. New best score: 0.555
[rank: 1] Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 0.642
[rank: 3] Metric val_loss improved by 0.016 >= min_delta = 0.0. New best score: 0.479
[rank: 4] Metric val_loss improved by 0.115 >= min_delta = 0.0. New best score: 0.507
[rank: 2] Metric val_loss improved by 0.033 >= min_delta = 0.0. New best score: 0.491


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.046 >= min_delta = 0.0. New best score: 0.399
[rank: 5] Metric val_loss improved by 0.061 >= min_delta = 0.0. New best score: 0.369
[rank: 1] Metric val_loss improved by 0.095 >= min_delta = 0.0. New best score: 0.547
[rank: 2] Metric val_loss improved by 0.027 >= min_delta = 0.0. New best score: 0.464


Validation: |                                                                                                 …

[rank: 2] Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 0.452
[rank: 0] Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 0.389
[rank: 3] Metric val_loss improved by 0.022 >= min_delta = 0.0. New best score: 0.457
[rank: 6] Metric val_loss improved by 0.093 >= min_delta = 0.0. New best score: 0.469
[rank: 7] Metric val_loss improved by 0.029 >= min_delta = 0.0. New best score: 0.526


Validation: |                                                                                                 …

[rank: 7] Metric val_loss improved by 0.052 >= min_delta = 0.0. New best score: 0.474
[rank: 2] Metric val_loss improved by 0.036 >= min_delta = 0.0. New best score: 0.416
[rank: 0] Metric val_loss improved by 0.038 >= min_delta = 0.0. New best score: 0.351
[rank: 3] Metric val_loss improved by 0.036 >= min_delta = 0.0. New best score: 0.421
[rank: 4] Metric val_loss improved by 0.042 >= min_delta = 0.0. New best score: 0.465


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.011 >= min_delta = 0.0. New best score: 0.341
[rank: 1] Metric val_loss improved by 0.024 >= min_delta = 0.0. New best score: 0.524
[rank: 4] Metric val_loss improved by 0.056 >= min_delta = 0.0. New best score: 0.409
[rank: 5] Metric val_loss improved by 0.061 >= min_delta = 0.0. New best score: 0.308
[rank: 2] Metric val_loss improved by 0.070 >= min_delta = 0.0. New best score: 0.346


Validation: |                                                                                                 …

[rank: 1] Metric val_loss improved by 0.053 >= min_delta = 0.0. New best score: 0.471
[rank: 7] Metric val_loss improved by 0.049 >= min_delta = 0.0. New best score: 0.425
[rank: 6] Metric val_loss improved by 0.086 >= min_delta = 0.0. New best score: 0.383
[rank: 3] Metric val_loss improved by 0.070 >= min_delta = 0.0. New best score: 0.351


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.036 >= min_delta = 0.0. New best score: 0.304
[rank: 1] Metric val_loss improved by 0.014 >= min_delta = 0.0. New best score: 0.457
[rank: 5] Metric val_loss improved by 0.026 >= min_delta = 0.0. New best score: 0.282
[rank: 4] Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 0.404


Validation: |                                                                                                 …

[rank: 1] Metric val_loss improved by 0.040 >= min_delta = 0.0. New best score: 0.417
[rank: 5] Metric val_loss improved by 0.019 >= min_delta = 0.0. New best score: 0.263
[rank: 4] Metric val_loss improved by 0.015 >= min_delta = 0.0. New best score: 0.389
[rank: 3] Metric val_loss improved by 0.018 >= min_delta = 0.0. New best score: 0.333
[rank: 7] Metric val_loss improved by 0.037 >= min_delta = 0.0. New best score: 0.388


Validation: |                                                                                                 …

[rank: 5] Metric val_loss improved by 0.023 >= min_delta = 0.0. New best score: 0.241
[rank: 3] Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.333
[rank: 7] Metric val_loss improved by 0.040 >= min_delta = 0.0. New best score: 0.348
[rank: 4] Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 0.384
[rank: 6] Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 0.379


Validation: |                                                                                                 …

[rank: 3] Metric val_loss improved by 0.025 >= min_delta = 0.0. New best score: 0.308
[rank: 2] Metric val_loss improved by 0.015 >= min_delta = 0.0. New best score: 0.331


Validation: |                                                                                                 …

[rank: 5] Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.237
[rank: 4] Metric val_loss improved by 0.058 >= min_delta = 0.0. New best score: 0.326
[rank: 2] Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 0.319
[rank: 6] Metric val_loss improved by 0.064 >= min_delta = 0.0. New best score: 0.315
[rank: 1] Metric val_loss improved by 0.013 >= min_delta = 0.0. New best score: 0.404


Validation: |                                                                                                 …

[rank: 5] Metric val_loss improved by 0.033 >= min_delta = 0.0. New best score: 0.204
[rank: 1] Metric val_loss improved by 0.026 >= min_delta = 0.0. New best score: 0.378


Validation: |                                                                                                 …

[rank: 5] Metric val_loss improved by 0.011 >= min_delta = 0.0. New best score: 0.192
[rank: 6] Metric val_loss improved by 0.013 >= min_delta = 0.0. New best score: 0.303


Validation: |                                                                                                 …

[rank: 7] Metric val_loss improved by 0.005 >= min_delta = 0.0. New best score: 0.343
[rank: 4] Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.322
[rank: 3] Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 0.301


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 0.303
[rank: 5] Metric val_loss improved by 0.019 >= min_delta = 0.0. New best score: 0.173


Validation: |                                                                                                 …

[rank: 4] Metric val_loss improved by 0.036 >= min_delta = 0.0. New best score: 0.287
[rank: 2] Metric val_loss improved by 0.004 >= min_delta = 0.0. New best score: 0.315
[rank: 1] Metric val_loss improved by 0.003 >= min_delta = 0.0. New best score: 0.375


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.012 >= min_delta = 0.0. New best score: 0.291
[rank: 6] Metric val_loss improved by 0.001 >= min_delta = 0.0. New best score: 0.301
[rank: 5] Metric val_loss improved by 0.019 >= min_delta = 0.0. New best score: 0.154


Validation: |                                                                                                 …

[rank: 3] Metric val_loss improved by 0.019 >= min_delta = 0.0. New best score: 0.282


Validation: |                                                                                                 …

[rank: 4] Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 0.278
[rank: 3] Metric val_loss improved by 0.073 >= min_delta = 0.0. New best score: 0.209
[rank: 7] Metric val_loss improved by 0.018 >= min_delta = 0.0. New best score: 0.325
[rank: 1] Metric val_loss improved by 0.049 >= min_delta = 0.0. New best score: 0.327


Validation: |                                                                                                 …

[rank: 4] Metric val_loss improved by 0.038 >= min_delta = 0.0. New best score: 0.240


Validation: |                                                                                                 …

[rank: 1] Metric val_loss improved by 0.018 >= min_delta = 0.0. New best score: 0.308


Validation: |                                                                                                 …

[rank: 6] Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 0.291
[rank: 2] Metric val_loss improved by 0.017 >= min_delta = 0.0. New best score: 0.298


Validation: |                                                                                                 …

Validation: |                                                                                                 …

[rank: 4] Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 0.233


Validation: |                                                                                                 …

[rank: 0] Metric val_loss improved by 0.000 >= min_delta = 0.0. New best score: 0.291


Validation: |                                                                                                 …

Validation: |                                                                                                 …

Validation: |                                                                                                 …

[rank: 6] Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 0.286


Validation: |                                                                                                 …

[rank: 6] Metric val_loss improved by 0.008 >= min_delta = 0.0. New best score: 0.277
[rank: 4] Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 0.226


Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=50` reached.
You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/8
You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` 

Detected Classes:Detected Classes:  Detected Classes:Detected Classes:['Core deformation', 'Core displacement', 'Core split', 'Foreign Object Damage', 'Resin buildup', 'Splice gap']['Core deformation', 'Core displacement', 'Core split', 'Foreign Object Damage', 'Resin buildup', 'Splice gap'] 
 
Detected Classes:Total number of classes:Detected Classes:['Core deformation', 'Core displacement', 'Core split', 'Foreign Object Damage', 'Resin buildup', 'Splice gap']Total number of classes:['Core deformation', 'Core displacement', 'Core split', 'Foreign Object Damage', 'Resin buildup', 'Splice gap']Detected Classes:   
  
['Core deformation', 'Core displacement', 'Core split', 'Foreign Object Damage', 'Resin buildup', 'Splice gap']6['Core deformation', 'Core displacement', 'Core split', 'Foreign Object Damage', 'Resin buildup', 'Splice gap']Total number of classes:6['Core deformation', 'Core displacement', 'Core split', 'Foreign Object Damage', 'Resin buildup', 'Splice gap']Total number of c

LOCAL_RANK: 5 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
LOCAL_RANK: 4 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
LOCAL_RANK: 7 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
LOCAL_RANK: 6 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
/opt/conda/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:215: Using `DistributedSampler` with the dataloaders. During `trainer.test()`, it is recommended to use `Trainer(devices=1, num_nodes=1)` to ensure each sample/batch gets evaluated exactly once. Otherwise, multi-device settings use `DistributedSampler` that replicates some samples to make sure all devices have same batch size in case of uneven inputs.


Testing: |                                                                                                    …


Classification Report:
                       precision    recall  f1-score   support

     Core deformation       0.81      0.94      0.87        18
    Core displacement       1.00      0.91      0.95        23
           Core split       0.67      1.00      0.80         2
Foreign Object Damage       1.00      0.80      0.89         5
        Resin buildup       1.00      0.67      0.80         3
           Splice gap       1.00      1.00      1.00         9

             accuracy                           0.92        60
            macro avg       0.91      0.89      0.89        60
         weighted avg       0.93      0.92      0.92        60



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                       precision    recall  f1-score   support

     Core deformation       0.78      1.00      0.88        14
    Core displacement       1.00      1.00      1.00        17
           Core split       1.00      1.00      1.00         4
Foreign Object Damage       0.82      0.82      0.82        11
        Resin buildup       0.00      0.00      0.00         4
           Splice gap       1.00      1.00      1.00        10

             accuracy                           0.90        60
            macro avg       0.77      0.80      0.78        60
         weighted avg       0.85      0.90      0.87        60



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                       precision    recall  f1-score   support

     Core deformation       0.95      1.00      0.97        19
    Core displacement       1.00      0.95      0.97        19
           Core split       0.80      0.80      0.80         5
Foreign Object Damage       1.00      1.00      1.00         6
        Resin buildup       0.00      0.00      0.00         1
           Splice gap       0.91      1.00      0.95        10

             accuracy                           0.95        60
            macro avg       0.78      0.79      0.78        60
         weighted avg       0.94      0.95      0.94        60


Classification Report:
                       precision    recall  f1-score   support

     Core deformation       0.79      0.95      0.86        20
    Core displacement       0.89      0.94      0.91        17
           Core split       1.00      0.17      0.29         6
Foreign Object Damage       0.75      0.86      0.80         7
        Resin buildup      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                       precision    recall  f1-score   support

     Core deformation       0.75      1.00      0.86        15
    Core displacement       1.00      1.00      1.00        15
           Core split       1.00      0.62      0.77         8
Foreign Object Damage       0.92      1.00      0.96        11
        Resin buildup       0.00      0.00      0.00         2
           Splice gap       0.88      0.78      0.82         9

             accuracy                           0.88        60
            macro avg       0.76      0.73      0.73        60
         weighted avg       0.87      0.88      0.87        60



/opt/conda/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test_precision', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/opt/conda/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test_recall', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.
/opt/conda/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:431: It is recommended to use `self.log('test_f1_score', ..., sync_dist=True)` when logging on epoch level in distributed setting to accumulate the metric across devices.


In [6]:
import os

def clean_dataset(directory, valid_extensions=('.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp')):
    """
    Traverse through the directory and delete files that do not have valid image extensions.

    Args:
        directory (str): Path to the dataset directory.
        valid_extensions (tuple): Tuple of valid file extensions.
    """
    for root, dirs, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            if not file.lower().endswith(valid_extensions):
                print(f"Deleting invalid file: {file_path}")
                os.remove(file_path)
        for dir in dirs:
            dir_path = os.path.join(root, dir)
            if dir == ".ipynb_checkpoints":  # Specific to Jupyter's checkpoint folders
                print(f"Deleting invalid directory: {dir_path}")
                os.rmdir(dir_path)

# Replace 'dataset_final' with your dataset directory path
dataset_dir = 'dataset_final'
clean_dataset(dataset_dir)


Deleting invalid directory: dataset_final/.ipynb_checkpoints


In [None]:
#!pip install dataset