In [1]:
import os
%pwd

'd:\\Kidney-Disease-Classification\\notebooks'

In [2]:
os.chdir('../')

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    data: Path
    model: Path

In [4]:
from KidneyDisease.constants import *
from KidneyDisease.utils.common import read_yaml, create_directories

In [5]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])
        
    def get_model_trainer_config(self)-> ModelTrainerConfig:
        config = self.config.model_trainer
        
        create_directories([config.model])
        
        model_trainer_config = ModelTrainerConfig(
            data=config.data,
            model=config.model
        )
        return model_trainer_config

In [6]:
import os
import torch, torchvision
from pathlib import Path
import random
from PIL import Image
from torch import nn
from torchvision import transforms
from KidneyDisease.utils.helper_functions import *
# Setting up the device agnostic code.
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

  from .autonotebook import tqdm as notebook_tqdm


'cpu'

In [7]:
class ModelTrainer:
    def __init__(self, config:ModelTrainerConfig):
        self.config = config
        
    def trainer(self):
        # Location of the data
        loc = self.config.data
        
        # Transforming the data and turning into tensor format
        data_transform = transforms.Compose([
        transforms.Resize(size=(224,224)),
        transforms.ToTensor()
        ])
        
        # Custom data loader for pytorch
        data = ImageFolderCustom(targ_dir=loc,
                                 transform=data_transform)
        
        ## Splitting the data into train and test data for training.
        train_size = int(0.70 * len(data))
        test_size = len(data) - train_size
        
        train_dataset, test_dataset = torch.utils.data.random_split(data, [train_size, test_size])

        # Setup batch size and number of workers
        BATCH_SIZE = 64
        print(f"Creating DataLoader's with batch size {BATCH_SIZE}")

        # Turn train and test custom Dataset's into DataLoader's
        from torch.utils.data import DataLoader
        train_dataloader_custom = DataLoader(dataset=train_dataset, # use custom created train Dataset
                                            batch_size=BATCH_SIZE, # how many samples per batch?
                                            num_workers=0, # how many subprocesses to use for data loading? (higher = more)
                                            shuffle=True) # shuffle the data?

        test_dataloader_custom = DataLoader(dataset=test_dataset, # use custom created test Dataset
                                            batch_size=BATCH_SIZE,
                                            num_workers=0,
                                            shuffle=False) # don't usually need to shuffle testing data    
                
        # 1. Setup pretrained EffNetB2 weights
        effnetb2_weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT

        # 2. Get EffNetB2 transforms
        effnetb2_transforms = effnetb2_weights.transforms()

        # 3. Setup pretrained model
        effnetb2 = torchvision.models.efficientnet_b2(weights=effnetb2_weights) # could also use weights="DEFAULT"

        # 4. Freeze the base layers in the model (this will freeze all layers to begin with)
        for param in effnetb2.parameters():
            param.requires_grad = False
            
        # 5. Update the classifier head
        effnetb2.classifier = nn.Sequential(
            nn.Dropout(p=0.3, inplace=True), # keep dropout layer same
            nn.Linear(in_features=1408, # keep in_features same
                    out_features=4)) # change out_features to suit our number of classes   
                    
        # Define loss and optimizer
        loss_fn = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(params=effnetb2.parameters(),
                                    lr=1e-3) 

        # Start the timer
        from timeit import default_timer as timer
        start_time = timer()

        # Setup training and save the results
        results = train(model=effnetb2.to(device),
                            train_dataloader=train_dataloader_custom,
                            test_dataloader=test_dataloader_custom,
                            optimizer=optimizer,
                            loss_fn=loss_fn,
                            epochs=1)

        # End the timer and print out how long it took
        end_time = timer()
        print(f"[INFO] Total training time: {end_time-start_time:.3f} seconds") 
        
        torch.save(effnetb2, 'artifacts/models/model_1.pth')
    

In [8]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer = ModelTrainer(config=model_trainer_config)
    model_trainer.trainer()
    
except Exception as e:
    raise e

[2023-10-13 18:47:14,761: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-10-13 18:47:14,762: INFO: common: yaml file: params.yaml loaded successfully]
[2023-10-13 18:47:14,762: INFO: common: created directory at: artifacts]
[2023-10-13 18:47:14,762: INFO: common: created directory at: artifacts/models]
Creating DataLoader's with batch size 64


100%|██████████| 1/1 [09:34<00:00, 574.75s/it]

Epoch: 1 | train_loss: 0.7435 | train_acc: 0.7394 | test_loss: 0.4740 | test_acc: 0.8647
[INFO] Total training time: 574.761 seconds



