In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
# pip install wandb -qU

# import wandb
# wandb.login()

In [3]:
import os
from torchvision import transforms
from PIL import Image
from torch.utils.data import Dataset, DataLoader, ConcatDataset, random_split
import pandas as pd

class CrackDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
        # Define a mapping from string labels to integer labels
        self.label_map = {'Positive': 1, 'Negative': 0}
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx, 0]
        label_str = self.dataframe.iloc[idx, 1]

        image = Image.open(img_path)
        if self.transform:
            image = self.transform(image)
        
        # Map string label to integer label
        label_int = self.label_map[label_str]

        # Convert label to tensor
        label_tensor = torch.tensor(label_int, dtype=torch.long)

        return image, label_tensor
#         label = 0
#         for i, c in enumerate(self.classes):
#             if idx < len(os.listdir(os.path.join(self.data_dir, c))):
#                 label = i
#                 break
                
#             else:
#                 idx -= len(os.listdir(os.path.join(self.data_dir, c)))
                
#             image_name = os.listdir(os.path.join(self.data_dir, self.classes[label]))[idx]
#             image = Image.open(os.path.join(self.data_dir, self.classes[label], image_name))
#             if self.transform:
#                 image = self.transform(image)
                
#             return image, label

In [4]:
pip install torchmetrics

Note: you may need to restart the kernel to use updated packages.


In [5]:
import torch
from pytorch_lightning import LightningDataModule

class CrackDataModule(LightningDataModule):
    def __init__(
    self,
    data_dir,
    batch_size: int = 64,
    num_workers: int = 0,
    pin_memory: bool = False,
    validation_split: float = 0.2,
    shuffle_dataset: bool = True,
    random_seed = 42,
    ) -> None:
        super().__init__()
        self.data_dir = data_dir
        self.num_workers = num_workers
        self.pin_memory = pin_memory
        self.validation_split = validation_split
        self.shuffle_dataset = shuffle_dataset
        self.random_seed = random_seed
        
        self.transforms = transforms.Compose(
            [
                transforms.Resize((299, 299)),
                transforms.ToTensor(),
            ]
        )
        
        self.batch_size = batch_size
        
    def prepare_data(self):
        pass
    
    def setup(self, stage=None):
#         self.data_dir = data_dir
        all_files = []
        labels = []
        
        for class_name in os.listdir(self.data_dir):
            class_dir = os.path.join(self.data_dir, class_name)
            for file in os.listdir(class_dir):
                all_files.append(os.path.join(class_dir, file))
                labels.append(class_name)
                
        train_files, val_files, train_labels, val_labels = train_test_split(
            all_files, labels, test_size=self.validation_split, random_state=self.random_seed
        )
        
        train_df = pd.DataFrame({'file_path': train_files, 'label': train_labels})
        val_df = pd.DataFrame({'file_path': val_files, 'label': val_labels})
        
        self.train_dataset = CrackDataset(dataframe=train_df, transform=self.transforms)
        self.val_dataset = CrackDataset(dataframe=val_df, transform=self.transforms)     
        
    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            pin_memory=self.pin_memory,
            shuffle=self.shuffle_dataset
        )
    
    def val_dataloader(self):
        return DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            pin_memory=self.pin_memory,
            shuffle=False
        )
    
    def test_dataloader(self):
        pass

In [6]:
import torch
import torch.nn as nn
from torchmetrics import Accuracy

class InceptionCustom(nn.Module):
    def __init__(
        self,
        num_classes
    ) -> None:
        super(InceptionCustom, self).__init__()
        self.model =  torch.hub.load('pytorch/vision:v0.10.0', 'inception_v3', pretrained=True)
        num_features = self.model.fc.in_features
        self.model.fc = nn.Linear(num_features, num_classes)
        
    def forward(self, x):
        return self.model(x)

In [7]:
from pytorch_lightning import LightningModule
import torch.nn.functional as F
import torchmetrics

class Classifier(LightningModule):
    def __init__(
        self,
        net: torch.nn.Module,
        lr: float = 0.001
    ) -> None:
        super().__init__()
        self.model = net
        self.accuracy = torchmetrics.classification.Accuracy(task="multiclass", num_classes=2)
        self.lr = lr
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.model(x)
    
    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self.model(inputs)
        # No need to apply softmax here, as it's likely already applied in the model
        loss = F.cross_entropy(outputs.logits, labels)
        self.log("train_loss", loss, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        inputs, labels = batch
        outputs = self.model(inputs)
        # No need to apply softmax here, as it's likely already applied in the model
        loss = F.cross_entropy(outputs, labels)
        preds = torch.argmax(outputs, dim=1)
        acc = self.accuracy(preds, labels)
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", acc, prog_bar=True)


        
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
        return optimizer

In [None]:
import torch
import pytorch_lightning as pl
from sklearn.model_selection import train_test_split

# Inisialisasi CrackDataModule
data_module = CrackDataModule(data_dir="/kaggle/input/surface-crack-detection", batch_size=64)

# Inisialisasi model InceptionV3
num_classes = 2
inception_model = InceptionCustom(num_classes=num_classes)

# Inisialisasi Classifier
classifier = Classifier(net=inception_model, lr=0.001)

# Inisialisasi PyTorch Lightning Trainer
trainer = pl.Trainer(max_epochs=10)  

# Melatih model
trainer.fit(classifier, data_module)


Downloading: "https://github.com/pytorch/vision/zipball/v0.10.0" to /root/.cache/torch/hub/v0.10.0.zip
Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth
100%|██████████| 104M/104M [00:00<00:00, 151MB/s]  
2024-03-18 05:49:43.743016: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-18 05:49:43.743142: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-18 05:49:43.865209: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.
/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]