In [9]:
%pwd

'/home/grkmkola/Desktop/Projects/mlops-proje/kidney-disease-classification/research'

In [10]:
%cd ..
%pwd

/home/grkmkola/Desktop/Projects/mlops-proje/kidney-disease-classification


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


'/home/grkmkola/Desktop/Projects/mlops-proje/kidney-disease-classification'

In [11]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    last_model_path: Path
    best_model_path: Path
    updated_base_model_path: Path
    tensorboard_log_dir: Path
    training_data: Path
    params_epochs: int
    params_batch_size: int
    params_augmentation: bool
    params_image_size: list
    params_early_stopping_patience: int
    params_learning_rate: float
    params_random_state: int

In [12]:
import os
from cnnClassifier.constants import *
from cnnClassifier.utils import read_yaml, create_directories, logger

In [13]:
class ConfigurationManager:
    def __init__(
            self,
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH,
        ) -> None:
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories(
            [
                self.config.artifacts_root,
                self.config.training.root_dir,
                self.config.training.tensorboard_log_dir,
            ]
        )

    def get_training_config(self):
        config = self.config.training
        prepare_base_model = self.config.prepare_base_model
        data_ingestion = self.config.data_ingestion

        params = self.params

        training_data = os.path.join(
            data_ingestion.unzip_dir,
            "CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone"    
        )

        training_config = TrainingConfig(
            root_dir=Path(config.root_dir),
            last_model_path=Path(config.last_model_path),
            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
            training_data=Path(training_data),
            tensorboard_log_dir=Path(config.tensorboard_log_dir),
            params_epochs=params.EPOCHS,
            params_batch_size=params.BATCH_SIZE,
            params_augmentation=params.AUGMENTATION,
            params_image_size=params.IMAGE_SIZE,
            params_early_stopping_patience=params.EARLY_STOPPING_PATIENCE,
            params_learning_rate=params.LEARNING_RATE,
            params_random_state=params.RANDOM_STATE
        )
        
        return training_config

In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
from tqdm import tqdm
from pathlib import Path
from sklearn.metrics import precision_score, recall_score, f1_score

In [15]:
class Training:
    def __init__(self, config):
        self.config = config
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    def get_base_model(self):
        self.model = torch.load(self.config.updated_base_model_path)
        self.model.to(self.device)

    def train_valid_test_loader(self):
        basic_transform = transforms.Compose([
            transforms.Resize(self.config.params_image_size[:-1]),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

        if self.config.params_augmentation:
            train_transform = transforms.Compose([
                transforms.RandomRotation(40),
                transforms.RandomHorizontalFlip(),
                transforms.RandomResizedCrop(self.config.params_image_size[0], scale=(0.8, 1.0)),
                transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
                basic_transform
            ])
        else:
            train_transform = basic_transform

        full_dataset = datasets.ImageFolder(self.config.training_data, transform=train_transform)

        total_size = len(full_dataset)
        train_size = int(0.7 * total_size)  # 70% for training
        valid_size = int(0.15 * total_size)  # 15% for validation
        test_size = total_size - train_size - valid_size  # 15% for testing

        # Ensure reproducibility
        generator = torch.Generator().manual_seed(self.config.params_random_state)

        train_dataset, valid_dataset, test_dataset = random_split(full_dataset, [train_size, valid_size, test_size], generator=generator)

        # Apply transforms
        train_dataset.dataset.transform = train_transform
        valid_dataset.dataset.transform = basic_transform
        test_dataset.dataset.transform = basic_transform

        self.train_loader = DataLoader(train_dataset, batch_size=self.config.params_batch_size, shuffle=True, num_workers=4)
        self.valid_loader = DataLoader(valid_dataset, batch_size=self.config.params_batch_size, shuffle=False, num_workers=4)
        self.test_loader = DataLoader(test_dataset, batch_size=self.config.params_batch_size, shuffle=False, num_workers=4)

        logger.info(f"Number of training samples: {len(train_dataset)}")
        logger.info(f"Number of validation samples: {len(valid_dataset)}")
        logger.info(f"Number of test samples: {len(test_dataset)}")

    @staticmethod
    def save_model(path: Path, model: nn.Module):
        torch.save(model, path)

    def train(self):
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.model.parameters(), lr=self.config.params_learning_rate)
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=self.config.params_learning_rate, steps_per_epoch=len(self.train_loader), epochs=self.config.params_epochs)
        best_valid_loss = float('inf')
        early_stopping_counter = 0

        for epoch in range(self.config.params_epochs):
            self.model.train()
            train_loss = 0.0
            train_pbar = tqdm(self.train_loader, desc=f'Epoch {epoch+1}/{self.config.params_epochs} [Train]')
            for inputs, labels in train_pbar:
                inputs, labels = inputs.to(self.device), labels.to(self.device)

                optimizer.zero_grad()
                outputs = self.model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                scheduler.step()

                train_loss += loss.item() * inputs.size(0)
                train_pbar.set_postfix({'loss': f'{loss.item():.4f}'})

            train_loss = train_loss / len(self.train_loader.dataset)

            self.model.eval()
            valid_loss = 0.0
            correct = 0
            total = 0
            all_labels = []
            all_predictions = []
            valid_pbar = tqdm(self.valid_loader, desc=f'Epoch {epoch+1}/{self.config.params_epochs} [Valid]')
            with torch.no_grad():
                for inputs, labels in valid_pbar:
                    inputs, labels = inputs.to(self.device), labels.to(self.device)
                    outputs = self.model(inputs)
                    loss = criterion(outputs, labels)
                    valid_loss += loss.item() * inputs.size(0)
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
                    
                    all_labels.extend(labels.cpu().numpy())
                    all_predictions.extend(predicted.cpu().numpy())
                    
                    valid_pbar.set_postfix({'loss': f'{loss.item():.4f}'})

            valid_loss = valid_loss / len(self.valid_loader.dataset)
            accuracy = 100 * correct / total

            # Calculate precision, recall, and F1 score
            precision = precision_score(all_labels, all_predictions, average='weighted')
            recall = recall_score(all_labels, all_predictions, average='weighted')
            f1 = f1_score(all_labels, all_predictions, average='weighted')

            logger.info(f'Epoch {epoch+1}/{self.config.params_epochs}, '
                         f'Train Loss: {train_loss:.4f}, '
                         f'Valid Loss: {valid_loss:.4f}, '
                         f'Valid Accuracy: {accuracy:.2f}%, '
                         f'Precision: {precision:.4f}, '
                         f'Recall: {recall:.4f}, '
                         f'F1 Score: {f1:.4f}')

            if valid_loss < best_valid_loss:
                best_valid_loss = valid_loss
                early_stopping_counter = 0
                self.save_model(path=self.config.last_model_path, model=self.model)
                logger.info(f'Saved best model with valid loss: {valid_loss:.4f}')
                
                # Save best model separately
                best_model_path = self.config.last_model_path.parent / "best_model.pth"
                self.save_model(path=best_model_path, model=self.model)
                logger.info(f'Saved best model separately at: {best_model_path}')
            else:
                early_stopping_counter += 1
                if early_stopping_counter >= self.config.params_early_stopping_patience:
                    logger.info('Early stopping triggered')
                    break

        logger.info(f'Training completed. Best validation loss: {best_valid_loss:.4f}')
        logger.info(f'Best model saved at: {self.config.last_model_path.parent / "best_model.pth"}')


In [16]:
config = ConfigurationManager().get_training_config()
training = Training(config)
training.get_base_model()
training.train_valid_test_loader()
training.train()

[2024-07-23 16:57:11,098: INFO: utils: yaml file config/config.yaml loaded successfully:]
[2024-07-23 16:57:11,105: INFO: utils: yaml file params.yaml loaded successfully:]
[2024-07-23 16:57:11,107: INFO: utils: created directory at: artifacts:]
[2024-07-23 16:57:11,110: INFO: utils: created directory at: artifacts/training:]
[2024-07-23 16:57:11,113: INFO: utils: created directory at: artifacts/training/tensorboard_logs:]
[2024-07-23 16:57:12,672: INFO: 4114949571: Number of training samples: 8712:]
[2024-07-23 16:57:12,697: INFO: 4114949571: Number of validation samples: 1866:]
[2024-07-23 16:57:12,699: INFO: 4114949571: Number of test samples: 1868:]


Epoch 1/1 [Train]: 100%|██████████| 545/545 [01:00<00:00,  8.95it/s, loss=0.4473]
Epoch 1/1 [Valid]: 100%|██████████| 117/117 [00:12<00:00,  9.23it/s, loss=0.1166]

[2024-07-23 16:58:26,356: INFO: 4114949571: Epoch 1/1, Train Loss: 0.7426, Valid Loss: 0.2509, Valid Accuracy: 91.69%, Precision: 0.9167, Recall: 0.9169, F1 Score: 0.9151:]





[2024-07-23 16:58:26,684: INFO: 4114949571: Saved best model with valid loss: 0.2509:]
[2024-07-23 16:58:27,060: INFO: 4114949571: Saved best model separately at: artifacts/training/best_model.pth:]
[2024-07-23 16:58:27,064: INFO: 4114949571: Training completed. Best validation loss: 0.2509:]
[2024-07-23 16:58:27,065: INFO: 4114949571: Best model saved at: artifacts/training/best_model.pth:]
