In [1]:
import os

In [2]:
%pwd

'c:\\Users\\dhira\\Desktop\\MLflow\\MLflow-DVC\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\dhira\\Desktop\\MLflow\\MLflow-DVC'

In [5]:
#Training entity
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    updated_base_model_path: Path
    training_data: Path
    params_epochs: int
    params_batch_size: int
    params_is_augmentation: bool
    params_image_size: list
    params_classes: int

In [6]:
from cnnClassifier.constants import *
from cnnClassifier.utils.common import read_yaml, create_directories

In [10]:
# 5. Update the configuration manager in src config

class ConfigurationManager:
    def __init__(
            self, 
            config_filepath = CONFIG_FILE_PATH,
            params_filepath = PARAMS_FILE_PATH):
        
            self.config = read_yaml(config_filepath)
            self.params = read_yaml(params_filepath)

            create_directories([self.config.artifacts_root])

    def get_training_config(self) -> TrainingConfig:
          training = self.config.training
          prepare_base_model = self.config.prepare_base_model
          params = self.params
          training_data = os.path.join(self.config.data_ingestion.unzip_dir, "kidney-ct-scan-image")
          create_directories([
                Path(training.root_dir)
          ])

          training_config = TrainingConfig(
                root_dir = Path(training.root_dir),
                trained_model_path= Path(training.trained_model_path),
                updated_base_model_path= Path(prepare_base_model.updated_base_model_path),
                training_data = Path(training_data),
                params_epochs = params.EPOCHS,
                params_batch_size = params.BATCH_SIZE,
                params_is_augmentation = params.AUGMENTATION,
                params_image_size = params.IMAGE_SIZE,
                params_classes = self.params.CLASSES
          )

          return training_config


In [22]:
#TensorFlow dont use

import os
import urllib.request as request
from zipfile import ZipFile
import tensorflow as tf
import time

In [9]:
class Training:
    def __init__(self, config: TrainingConfig):
        self.config = config

    
    def get_base_model(self):
        self.model = tf.keras.models.load_model(
            self.config.updated_base_model_path
        )

    def train_valid_generator(self):

        datagenerator_kwargs = dict(
            rescale = 1./255,
            validation_split=0.20
        )

        dataflow_kwargs = dict(
            target_size=self.config.params_image_size[:-1],
            batch_size=self.config.params_batch_size,
            interpolation="bilinear"
        )

        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        )

        self.valid_generator = valid_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="validation",
            shuffle=False,
            **dataflow_kwargs
        )

        if self.config.params_is_augmentation:
            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
                rotation_range=40,
                horizontal_flip=True,
                width_shift_range=0.2,
                height_shift_range=0.2,
                shear_range=0.2,
                zoom_range=0.2,
                **datagenerator_kwargs
            )
        else:
            train_datagenerator = valid_datagenerator

        self.train_generator = train_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="training",
            shuffle=True,
            **dataflow_kwargs
        )

    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        model.save(path)



    
    def train(self):
        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size

        self.model.fit(
            self.train_generator,
            epochs=self.config.params_epochs,
            steps_per_epoch=self.steps_per_epoch,
            validation_steps=self.validation_steps,
            validation_data=self.valid_generator
        )

        self.save_model(
            path=self.config.trained_model_path,
            model=self.model
        )


In [13]:
#Pytorch use

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision import models
# from prepare_base_model_pytorch import PrepareBaseModel, PrepareBaseModelConfig
import os

class Training:
    def __init__(self, config: TrainingConfig, device=None):
        self.config = config
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.model = None

    def get_base_model(self):
        """Load the updated base model weights into VGG16 architecture"""
        # 1. Create same architecture
        model = models.vgg16(weights=None)
        
        # 2. Replace classifier with correct number of classes
        in_features = 512 * 7 * 7
        model.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features, self.config.params_classes)  # <- use params_classes
        )
        
        # 3. Load saved weights
        model.load_state_dict(torch.load(self.config.updated_base_model_path, map_location=self.device))
        
        # 4. Move to device
        model.to(self.device)
        self.model = model


    def train_valid_generator(self):
        """Create PyTorch datasets and dataloaders."""
        if self.config.params_is_augmentation:
            train_transform = transforms.Compose([
                transforms.RandomRotation(40),
                transforms.RandomHorizontalFlip(),
                transforms.RandomResizedCrop(self.config.params_image_size[0]),
                transforms.ColorJitter(),
                transforms.ToTensor(),
            ])
        else:
            train_transform = transforms.Compose([
                transforms.Resize(self.config.params_image_size[:2]),
                transforms.ToTensor()
            ])

        valid_transform = transforms.Compose([
            transforms.Resize(self.config.params_image_size[:2]),
            transforms.ToTensor()
        ])

        train_dataset = datasets.ImageFolder(
            root=self.config.training_data,
            transform=train_transform
        )

        valid_dataset = datasets.ImageFolder(
            root=self.config.training_data,
            transform=valid_transform
        )

        train_size = int(0.8 * len(train_dataset))
        valid_size = len(train_dataset) - train_size
        train_dataset, valid_dataset = torch.utils.data.random_split(train_dataset, [train_size, valid_size])

        self.train_loader = DataLoader(train_dataset, batch_size=self.config.params_batch_size, shuffle=True)
        self.valid_loader = DataLoader(valid_dataset, batch_size=self.config.params_batch_size, shuffle=False)

    @staticmethod
    def save_model(path: Path, model: nn.Module):
        path.parent.mkdir(parents=True, exist_ok=True)
        torch.save(model.state_dict(), path)

    def train(self, learning_rate=0.001):
        self.model.to(self.device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(filter(lambda p: p.requires_grad, self.model.parameters()), lr=learning_rate)

        for epoch in range(self.config.params_epochs):
            self.model.train()
            running_loss = 0.0
            correct = 0
            total = 0

            for images, labels in self.train_loader:
                images, labels = images.to(self.device), labels.to(self.device)
                optimizer.zero_grad()
                outputs = self.model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item() * images.size(0)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            train_acc = correct / total
            train_loss = running_loss / total

            # Validation
            self.model.eval()
            val_loss = 0.0
            val_correct = 0
            val_total = 0
            with torch.no_grad():
                for images, labels in self.valid_loader:
                    images, labels = images.to(self.device), labels.to(self.device)
                    outputs = self.model(images)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item() * images.size(0)
                    _, predicted = torch.max(outputs, 1)
                    val_total += labels.size(0)
                    val_correct += (predicted == labels).sum().item()

            val_acc = val_correct / val_total
            val_loss /= val_total

            print(f"Epoch [{epoch+1}/{self.config.params_epochs}] "
                  f"Train Loss: {train_loss:.4f} Train Acc: {train_acc:.4f} "
                  f"Val Loss: {val_loss:.4f} Val Acc: {val_acc:.4f}")

        self.save_model(self.config.trained_model_path, self.model)



In [14]:
try:
    config = ConfigurationManager()
    training_config = config.get_training_config()
    training = Training(config=training_config)
    training.get_base_model()
    training.train_valid_generator()
    training.train()
    
except Exception as e:
    raise e

[2025-09-08 22:38:41,273: INFO: common: yaml_file: config\config.yaml loaded successfully]
[2025-09-08 22:38:41,277: INFO: common: yaml_file: params.yaml loaded successfully]
[2025-09-08 22:38:41,280: INFO: common: created directory at: artifacts]
[2025-09-08 22:38:41,281: INFO: common: created directory at: artifacts\training]
Epoch [1/1] Train Loss: 0.5313 Train Acc: 0.7473 Val Loss: 0.3793 Val Acc: 0.8817
