# Brainn Tumor Classification

# Import Required Packages

In [1]:
import os
os.chdir("../")
%pwd

'd:\\python-projects\\brain-tumor-classification'

In [2]:
from pathlib import Path
from dataclasses import dataclass
from src.utils.common import read_yaml, create_directories, save_json
from src.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from src.logging import logger

import torch
import torch.nn as nn
from torchvision import transforms, datasets
import torchvision.models as models
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm

import time
from torch.utils.tensorboard import SummaryWriter


%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

# Data Ingestion

In [3]:
@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_data_dir: Path

The *DataIngestionConfig Class* store two important paths for data processing, and the "frozen" part ensures these paths can't be accidentally modified later.

In [4]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([Path(self.config.artifacts_root)])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion

        data_ingestion_config = DataIngestionConfig(
            root_dir=Path(config.root_dir),  
            source_data_dir=Path(config.source_data_dir)
        )

        return data_ingestion_config

*ConfigurationManager*  is a class that helps to manage and organize your project's settings.

In [5]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def initiate_data_ingestion(self):
        logger.info("Checking Data Directory is exsisting")
        if not self.config.source_data_dir.exists():
            raise FileNotFoundError(f"Data directory not found at {self.config.source_data_dir}")
        logger.info("Data directory found and ready to use")
        return self.config.source_data_dir

*DataIngestion* class is responsible for handling the data ingestion process, meaning it helps to check if the data directory exists and can be used for further processing.

In [6]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.initiate_data_ingestion()
except Exception as e:
    raise e

[2025-05-26 13:22:44,210] [14] [tumorClassifierLogger] - INFO - YAML file loaded successfully: config\config.yaml
[2025-05-26 13:22:44,213] [14] [tumorClassifierLogger] - INFO - YAML file loaded successfully: params.yaml
[2025-05-26 13:22:44,214] [27] [tumorClassifierLogger] - INFO - Created directory at: artifacts
[2025-05-26 13:22:44,215] [6] [tumorClassifierLogger] - INFO - Checking Data Directory is exsisting
[2025-05-26 13:22:44,216] [9] [tumorClassifierLogger] - INFO - Data directory found and ready to use


# Prepare Base Model

In [7]:
@dataclass(frozen=True)
class PrepareBaseModelConfig:
    root_dir: Path
    base_model_path: Path
    updated_base_model_path: Path
    params_image_size: list
    params_learning_rate: float
    params_include_top: bool
    params_weights: str
    params_classes: int

In [8]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([Path(self.config.artifacts_root)])


    def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:
        config = self.config.prepare_base_model
        
        create_directories([Path(config.root_dir)])
        

        prepare_base_model_config = PrepareBaseModelConfig(
            root_dir=Path(config.root_dir),
            base_model_path=Path(config.base_model_path),
            updated_base_model_path=Path(config.updated_base_model_path),
            params_image_size=self.params.IMAGE_SIZE,
            params_learning_rate=self.params.LEARNING_RATE,
            params_include_top=self.params.INCLUDE_TOP,
            params_weights=self.params.WEIGHTS,
            params_classes=self.params.CLASSES
        )

        return prepare_base_model_config

In [9]:

class PrepareBaseModel:
    def __init__(self, config: PrepareBaseModelConfig):
        self.config = config
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    def get_base_model(self):
        self.model = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
        
        if not self.config.params_include_top:
            self.model.classifier = nn.Identity()

        self.model.to(self.device)
        self.save_model(path=self.config.base_model_path, model=self.model)

    @staticmethod
    def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):
        if freeze_all:
            for param in model.parameters():
                param.requires_grad = False
        elif (freeze_till is not None) and (freeze_till > 0):
            for idx, child in enumerate(model.features):
                if idx < freeze_till:
                    for param in child.parameters():
                        param.requires_grad = False

        model.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(25088, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, classes),
            nn.Softmax(dim=1)
        )

        model.to(model.device if hasattr(model, 'device') else torch.device("cpu"))
        return model

    def update_base_model(self):
        self.full_model = self._prepare_full_model(
            model=self.model,
            classes=self.config.params_classes,
            freeze_all=True,
            freeze_till=None,
            learning_rate=self.config.params_learning_rate
        )
        self.save_model(self.config.updated_base_model_path, self.full_model)

    @staticmethod
    def save_model(path, model):
        torch.save(model, path)

In [10]:
try:
    config = ConfigurationManager()
    prepare_base_model_config = config.get_prepare_base_model_config()
    prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)
    prepare_base_model.get_base_model()
    prepare_base_model.update_base_model()
except Exception as e:
    raise e

[2025-05-26 13:22:44,278] [14] [tumorClassifierLogger] - INFO - YAML file loaded successfully: config\config.yaml
[2025-05-26 13:22:44,280] [14] [tumorClassifierLogger] - INFO - YAML file loaded successfully: params.yaml
[2025-05-26 13:22:44,282] [27] [tumorClassifierLogger] - INFO - Created directory at: artifacts
[2025-05-26 13:22:44,283] [27] [tumorClassifierLogger] - INFO - Created directory at: artifacts\prepare_base_model


# Prepare Callbaks

In [11]:
@dataclass(frozen=True)
class PrepareCallbacksConfig:
    root_dir: Path
    tensorboard_root_log_dir: Path
    checkpoint_model_filepath: Path

In [12]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([Path(self.config.artifacts_root)])

    
    def get_prepare_callback_config(self) -> PrepareCallbacksConfig:
        config = self.config.prepare_callbacks
        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)
        create_directories([
            Path(model_ckpt_dir),
            Path(config.tensorboard_root_log_dir)
        ])

        prepare_callback_config = PrepareCallbacksConfig(
            root_dir=Path(config.root_dir),
            tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),
            checkpoint_model_filepath=Path(config.checkpoint_model_filepath)
        )

        return prepare_callback_config

In [13]:
class PrepareCallback:
    def __init__(self, config:PrepareCallbacksConfig):
        self.config = config
        self.timestamp = time.strftime("%Y-%m-%d-%H-%M-%S")
        self.tb_running_log_dir = os.path.join(
            self.config.tensorboard_root_log_dir,
            f"tb_logs_at_{self.timestamp}",
        )
        self._writer = None
        self.best_loss = float('inf')

    @property
    def _create_tb_callbacks(self):
        """Equivalent of TensorBoard callback in TensorFlow"""
        if self._writer is None:
            self._writer = SummaryWriter(log_dir=self.tb_running_log_dir)
        return self._writer

    @property
    def _create_ckpt_callbacks(self):
        """Equivalent of ModelCheckpoint in TensorFlow"""
        def checkpoint_callback(model, current_loss):
            if current_loss < self.best_loss:
                self.best_loss = current_loss
                torch.save(model.state_dict(), self.config.checkpoint_model_filepath)
                print(f"[Checkpoint] Saved model with loss {current_loss:.4f}")
        return checkpoint_callback

    def get_tb_ckpt_callbacks(self):
        return [
            self._create_tb_callbacks,
            self._create_ckpt_callbacks
        ]

    def close(self):
        if self._writer is not None:
            self._writer.close()

In [14]:
try:
    config = ConfigurationManager()
    prepare_callbacks_config = config.get_prepare_callback_config()
    prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)
    callback_list = prepare_callbacks.get_tb_ckpt_callbacks()
    
except Exception as e:
    raise e

[2025-05-26 13:22:46,875] [14] [tumorClassifierLogger] - INFO - YAML file loaded successfully: config\config.yaml
[2025-05-26 13:22:46,879] [14] [tumorClassifierLogger] - INFO - YAML file loaded successfully: params.yaml
[2025-05-26 13:22:46,881] [27] [tumorClassifierLogger] - INFO - Created directory at: artifacts
[2025-05-26 13:22:46,882] [27] [tumorClassifierLogger] - INFO - Created directory at: artifacts\prepare_callbacks\checkpoint_dir
[2025-05-26 13:22:46,883] [27] [tumorClassifierLogger] - INFO - Created directory at: artifacts\prepare_callbacks\tensorboard_log_dir


# Training

In [3]:
@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    updated_base_model_path: Path
    source_data_dir: Path
    params_epochs: int
    params_batch_size: int
    params_is_augmentation: bool
    params_image_size: list
    params_learning_rate: float


@dataclass(frozen=True)
class PrepareCallbacksConfig:
    root_dir: Path
    tensorboard_root_log_dir: Path
    checkpoint_model_filepath: Path

In [4]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([Path(self.config.artifacts_root)])


    
    def get_prepare_callback_config(self) -> PrepareCallbacksConfig:
        config = self.config.prepare_callbacks
        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)
        create_directories([
            Path(model_ckpt_dir),
            Path(config.tensorboard_root_log_dir)
        ])

        prepare_callback_config = PrepareCallbacksConfig(
            root_dir=Path(config.root_dir),
            tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),
            checkpoint_model_filepath=Path(config.checkpoint_model_filepath)
        )

        return prepare_callback_config
    



    def get_training_config(self) -> TrainingConfig:
        training = self.config.training
        prepare_base_model = self.config.prepare_base_model
        params = self.params
        create_directories([Path(training.root_dir)])

        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            trained_model_path=Path(training.trained_model_path),
            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
            source_data_dir=Path(self.config.data_ingestion.source_data_dir),
            params_epochs=params.EPOCHS,
            params_batch_size=params.BATCH_SIZE,
            params_is_augmentation=params.AUGMENTATION,
            params_image_size=params.IMAGE_SIZE,
            params_learning_rate=params.LEARNING_RATE
        )

        return training_config

In [5]:
class PrepareCallback:
    def __init__(self, config:PrepareCallbacksConfig):
        self.config = config
        self.timestamp = time.strftime("%Y-%m-%d-%H-%M-%S")
        self.tb_running_log_dir = os.path.join(
            self.config.tensorboard_root_log_dir,
            f"tb_logs_at_{self.timestamp}",
        )
        self._writer = None
        self.best_loss = float('inf')

    @property
    def _create_tb_callbacks(self):
        """Equivalent of TensorBoard callback in TensorFlow"""
        if self._writer is None:
            self._writer = SummaryWriter(log_dir=self.tb_running_log_dir)
        return self._writer

    @property
    def _create_ckpt_callbacks(self):
        """Equivalent of ModelCheckpoint in TensorFlow"""
        def checkpoint_callback(model, current_loss):
            if current_loss < self.best_loss:
                self.best_loss = current_loss
                torch.save(model.state_dict(), self.config.checkpoint_model_filepath)
                print(f"[Checkpoint] Saved model with loss {current_loss:.4f}")
        return checkpoint_callback

    def get_tb_ckpt_callbacks(self):
        return [
            self._create_tb_callbacks,
            self._create_ckpt_callbacks
        ]

    def close(self):
        if self._writer is not None:
            self._writer.close()

In [6]:

class Training:
    def __init__(self, config: TrainingConfig, callback_handler: PrepareCallback):
        self.config = config
        self.callback_handler = callback_handler
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = None

    def load_model(self):
        self.model = torch.load(self.config.updated_base_model_path, map_location=self.device, weights_only=False)
        self.model.to(self.device)


    def get_data_loaders(self):
        transform_list = [
            transforms.Resize(self.config.params_image_size[:-1]),
            transforms.ToTensor()
        ]

        if self.config.params_is_augmentation:
            transform_list = [
                transforms.RandomHorizontalFlip(),
                transforms.RandomRotation(20),
                transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
            ] + transform_list

        transform = transforms.Compose(transform_list)

        dataset = datasets.ImageFolder(self.config.source_data_dir, transform=transform)

        val_size = int(0.2 * len(dataset))
        train_size = len(dataset) - val_size

        generator = torch.Generator().manual_seed(42)
        train_dataset, _ = random_split(dataset, [train_size, val_size], generator=generator)

        self.train_loader = DataLoader(
            train_dataset,
            batch_size=self.config.params_batch_size,
            shuffle=True
        )


    def train(self):
        self.load_model()
        self.get_data_loaders()
        tb_writer, checkpoint_callback = self.callback_handler.get_tb_ckpt_callbacks()

        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.SGD(self.model.parameters(), lr=self.config.params_learning_rate)

        for epoch in range(self.config.params_epochs):
            self.model.train()
            running_loss = 0.0
            correct = 0
            total = 0

            loop = tqdm(self.train_loader, desc=f"Epoch [{epoch+1}/{self.config.params_epochs}]")
            for i, (inputs, labels) in enumerate(loop):
                inputs, labels = inputs.to(self.device), labels.to(self.device)

                optimizer.zero_grad()
                outputs = self.model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                loop.set_postfix(loss=running_loss / (total if total else 1),
                                 acc=100. * correct / total if total else 0)
                
                step = epoch * len(self.train_loader) + i
                tb_writer.add_scalar('Loss/train', loss.item(), step)
                tb_writer.add_scalar('Accuracy/train', 100. * correct / total, step)

            print(f"Epoch {epoch+1}: Train Accuracy: {100. * correct / total:.2f}%")
            checkpoint_callback(self.model, running_loss)

        self.save_model()
        self.callback_handler.close()


    def save_model(self):
        torch.save(self.model, self.config.trained_model_path)

In [7]:
try:
    config = ConfigurationManager()
    prepare_callbacks_config = config.get_prepare_callback_config()
    prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)

    training_config = config.get_training_config()
    training = Training(config=training_config, callback_handler=prepare_callbacks)
    training.train()
    
except Exception as e:
    raise e

[2025-05-27 09:23:28,693] [15] [tumorClassifierLogger] - INFO - YAML file loaded successfully: config\config.yaml
[2025-05-27 09:23:28,696] [15] [tumorClassifierLogger] - INFO - YAML file loaded successfully: params.yaml
[2025-05-27 09:23:28,698] [28] [tumorClassifierLogger] - INFO - Created directory at: artifacts
[2025-05-27 09:23:28,699] [28] [tumorClassifierLogger] - INFO - Created directory at: artifacts\prepare_callbacks\checkpoint_dir
[2025-05-27 09:23:28,701] [28] [tumorClassifierLogger] - INFO - Created directory at: artifacts\prepare_callbacks\tensorboard_log_dir
[2025-05-27 09:23:28,702] [28] [tumorClassifierLogger] - INFO - Created directory at: artifacts\training


Epoch [1/1]: 100%|██████████| 13/13 [00:06<00:00,  2.12it/s, acc=60.6, loss=0.0429]


Epoch 1: Train Accuracy: 60.59%
[Checkpoint] Saved model with loss 8.7009


# Model Evaluation

In [8]:
@dataclass(frozen=True)
class EvaluationConfig:
    path_of_model: Path
    source_data: Path
    all_params: dict
    params_image_size: list
    params_batch_size: int

In [9]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([Path(self.config.artifacts_root)])


    def get_validation_config(self) -> EvaluationConfig:
        eval_config = EvaluationConfig(
            path_of_model="artifacts/training/model.pth",
            source_data="brain-tumor-dataset",
            all_params=self.params,
            params_image_size=self.params.IMAGE_SIZE,
            params_batch_size=self.params.BATCH_SIZE
        )
        return eval_config
    

    def get_prepare_callback_config(self) -> PrepareCallbacksConfig:
        config = self.config.prepare_callbacks
        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)
        create_directories([
            Path(model_ckpt_dir),
            Path(config.tensorboard_root_log_dir)
        ])

        prepare_callback_config = PrepareCallbacksConfig(
            root_dir=Path(config.root_dir),
            tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),
            checkpoint_model_filepath=Path(config.checkpoint_model_filepath)
        )

        return prepare_callback_config

In [10]:
class Evaluation:
    def __init__(self, config: EvaluationConfig, callback_handler: PrepareCallback):
        self.config = config
        self.callback_handler = callback_handler 
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = None
        self.val_loader = None
        self.latest_score = {}

    def load_model(self):
        self.model = torch.load(self.config.path_of_model, map_location=self.device, weights_only=False)
        self.model.to(self.device)

    def validation_data_loader(self):
        transform = transforms.Compose([
            transforms.Resize(self.config.params_image_size[:-1]),
            transforms.ToTensor()
        ])

        dataset = datasets.ImageFolder(self.config.source_data, transform=transform)
        val_size = int(0.2 * len(dataset))
        generator = torch.Generator().manual_seed(42)
        _, val_dataset = random_split(dataset, [len(dataset) - val_size, val_size], generator=generator)

        self.val_loader = DataLoader(
            val_dataset,
            batch_size=self.config.params_batch_size,
            shuffle=False
        )


    def evaluate(self):
        tb_writer, _ = self.callback_handler.get_tb_ckpt_callbacks()
        self.load_model()
        self.validation_data_loader()
        self.model.eval()

        criterion = nn.CrossEntropyLoss()

        correct = 0
        total = 0
        total_loss = 0


        with torch.no_grad():
            for inputs, labels in self.val_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = self.model(inputs)
                loss = criterion(outputs, labels)
                total_loss += loss.item() * labels.size(0)


                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        accuracy = 100. * correct / total if total > 0 else 0.0
        avg_loss = total_loss / total if total > 0 else 0.0

        print(f"Validation Accuracy: {accuracy:.2f}% Validation Loss: {avg_loss:.4f}")

        self.latest_score = {
            "accuracy": accuracy,
            "loss": avg_loss
        }


        tb_writer.add_scalar("Loss/validation", avg_loss, 0)
        tb_writer.add_scalar("Accuracy/validation", accuracy, 0)
        tb_writer.flush()


        return accuracy
    

    def save_score(self):
        save_json(path=Path("scores.json"), data=self.latest_score)


In [11]:
try:
    config = ConfigurationManager()
    val_config = config.get_validation_config()
    prepare_callbacks_config = config.get_prepare_callback_config()
    prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)
    evaluation = Evaluation(val_config, prepare_callbacks)
    evaluation.evaluate()
    evaluation.save_score()

except Exception as e:
   raise e

[2025-05-27 09:23:37,448] [15] [tumorClassifierLogger] - INFO - YAML file loaded successfully: config\config.yaml
[2025-05-27 09:23:37,451] [15] [tumorClassifierLogger] - INFO - YAML file loaded successfully: params.yaml
[2025-05-27 09:23:37,452] [28] [tumorClassifierLogger] - INFO - Created directory at: artifacts
[2025-05-27 09:23:37,453] [28] [tumorClassifierLogger] - INFO - Created directory at: artifacts\prepare_callbacks\checkpoint_dir
[2025-05-27 09:23:37,454] [28] [tumorClassifierLogger] - INFO - Created directory at: artifacts\prepare_callbacks\tensorboard_log_dir
Validation Accuracy: 68.00% Validation Loss: 0.6281
[2025-05-27 09:23:38,588] [42] [tumorClassifierLogger] - INFO - json file saved at: scores.json
