# Brainn Tumor Classification

# Import Required Packages

In [1]:
import os
os.chdir("../")
%pwd

'd:\\python-projects\\brain-tumor-classification'

In [2]:
from pathlib import Path
from dataclasses import dataclass
from src.utils.common import read_yaml, create_directories
from src.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from src.logging import logger

import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import DataLoader
from tqdm import tqdm


import time
from torch.utils.tensorboard import SummaryWriter


%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

# Data Ingestion

In [22]:
@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_data_dir: Path

The *DataIngestionConfig Class* store two important paths for data processing, and the "frozen" part ensures these paths can't be accidentally modified later.

In [26]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([Path(self.config.artifacts_root)])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        config = self.config.data_ingestion
        create_directories([Path(config.root_dir)])

        data_ingestion_config = DataIngestionConfig(
            root_dir=Path(config.root_dir),  
            source_data_dir=Path(config.source_data_dir)
        )

        return data_ingestion_config

*ConfigurationManager*  is a class that helps to manage and organize your project's settings.

In [27]:
class DataIngestion:
    def __init__(self, config: DataIngestionConfig):
        self.config = config

    def initiate_data_ingestion(self):
        logger.info("Checking Data Directory is exsisting")
        if not self.config.source_data_dir.exists():
            raise FileNotFoundError(f"Data directory not found at {self.config.source_data_dir}")
        logger.info("Data directory found and ready to use")
        return self.config.source_data_dir

*DataIngestion* class is responsible for handling the data ingestion process, meaning it helps to check if the data directory exists and can be used for further processing.

In [28]:
try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(config=data_ingestion_config)
    data_ingestion.initiate_data_ingestion()
except Exception as e:
    raise e

[2025-05-05 15:49:23,304] [14] [tumorClassifierLogger] - INFO - YAML file loaded successfully: config\config.yaml
[2025-05-05 15:49:23,307] [14] [tumorClassifierLogger] - INFO - YAML file loaded successfully: params.yaml
[2025-05-05 15:49:23,308] [27] [tumorClassifierLogger] - INFO - Created directory at: artifacts
[2025-05-05 15:49:23,309] [27] [tumorClassifierLogger] - INFO - Created directory at: artifacts\data_ingestion
[2025-05-05 15:49:23,310] [6] [tumorClassifierLogger] - INFO - Checking Data Directory is exsisting
[2025-05-05 15:49:23,311] [9] [tumorClassifierLogger] - INFO - Data directory found and ready to use


# Prepare Base Model

In [3]:
@dataclass(frozen=True)
class PrepareBaseModelConfig:
    root_dir: Path
    base_model_path: Path
    updated_base_model_path: Path
    params_image_size: list
    params_learning_rate: float
    params_include_top: bool
    params_weights: str
    params_classes: int

In [7]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([Path(self.config.artifacts_root)])


    def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:
        config = self.config.prepare_base_model
        
        create_directories([Path(config.root_dir)])
        

        prepare_base_model_config = PrepareBaseModelConfig(
            root_dir=Path(config.root_dir),
            base_model_path=Path(config.base_model_path),
            updated_base_model_path=Path(config.updated_base_model_path),
            params_image_size=self.params.IMAGE_SIZE,
            params_learning_rate=self.params.LEARNING_RATE,
            params_include_top=self.params.INCLUDE_TOP,
            params_weights=self.params.WEIGHTS,
            params_classes=self.params.CLASSES
        )

        return prepare_base_model_config

In [8]:

class PrepareBaseModel:
    def __init__(self, config: PrepareBaseModelConfig):
        self.config = config
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    def get_base_model(self):
        self.model = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
        
        if not self.config.params_include_top:
            self.model.classifier = nn.Identity()

        self.model.to(self.device)
        self.save_model(path=self.config.base_model_path, model=self.model)

    @staticmethod
    def _prepare_full_model(model, classes, freeze_all, freeze_till, learning_rate):
        if freeze_all:
            for param in model.parameters():
                param.requires_grad = False
        elif (freeze_till is not None) and (freeze_till > 0):
            for idx, child in enumerate(model.features):
                if idx < freeze_till:
                    for param in child.parameters():
                        param.requires_grad = False

        model.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(25088, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, classes),
            nn.Softmax(dim=1)
        )

        model.to(model.device if hasattr(model, 'device') else torch.device("cpu"))
        return model

    def update_base_model(self):
        self.full_model = self._prepare_full_model(
            model=self.model,
            classes=self.config.params_classes,
            freeze_all=True,
            freeze_till=None,
            learning_rate=self.config.params_learning_rate
        )
        self.save_model(self.config.updated_base_model_path, self.full_model)

    @staticmethod
    def save_model(path, model):
        torch.save(model.state_dict(), path)


    
    def train_model(self, train_loader: DataLoader, val_loader: DataLoader = None, epochs: int = 5):
        model = self.full_model.to(self.device)

        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=self.config.params_learning_rate)

        for epoch in range(epochs):
            model.train()
            running_loss = 0.0
            correct = 0
            total = 0

            loop = tqdm(train_loader, desc=f"Epoch [{epoch+1}/{epochs}]", leave=False)
            for inputs, labels in loop:
                inputs, labels = inputs.to(self.device), labels.to(self.device)

                optimizer.zero_grad()
                outputs = model(inputs)

                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                loop.set_postfix(loss=running_loss / (total if total else 1),
                                 acc=100. * correct / total if total else 0)

            print(f"Epoch {epoch+1}/{epochs} - Loss: {running_loss:.4f}, Accuracy: {100. * correct / total:.2f}%")

            # Optional: validation
            if val_loader:
                self.evaluate_model(model, val_loader)

        self.save_model(self.config.updated_base_model_path, model)

    def evaluate_model(self, model, val_loader):
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        print(f"Validation Accuracy: {100. * correct / total:.2f}%")

In [13]:
try:
    config = ConfigurationManager()
    prepare_base_model_config = config.get_prepare_base_model_config()
    prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)
    prepare_base_model.get_base_model()
    prepare_base_model.update_base_model()
except Exception as e:
    raise e

[2025-05-17 23:52:00,297] [14] [tumorClassifierLogger] - INFO - YAML file loaded successfully: config\config.yaml
[2025-05-17 23:52:00,300] [14] [tumorClassifierLogger] - INFO - YAML file loaded successfully: params.yaml
[2025-05-17 23:52:00,302] [27] [tumorClassifierLogger] - INFO - Created directory at: artifacts
[2025-05-17 23:52:00,303] [27] [tumorClassifierLogger] - INFO - Created directory at: artifacts\prepare_base_model
Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to C:\Users\Amir/.cache\torch\hub\checkpoints\vgg16-397923af.pth


100%|██████████| 528M/528M [17:33<00:00, 525kB/s] 


# Prepare Callbaks

In [14]:
@dataclass(frozen=True)
class PrepareCallbacksConfig:
    root_dir: Path
    tensorboard_root_log_dir: Path
    checkpoint_model_filepath: Path

In [15]:
class ConfigurationManager:
    def __init__(
        self, 
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([Path(self.config.artifacts_root)])

    
    def get_prepare_callback_config(self) -> PrepareCallbacksConfig:
        config = self.config.prepare_callbacks
        model_ckpt_dir = os.path.dirname(config.checkpoint_model_filepath)
        create_directories([
            Path(model_ckpt_dir),
            Path(config.tensorboard_root_log_dir)
        ])

        prepare_callback_config = PrepareCallbacksConfig(
            root_dir=Path(config.root_dir),
            tensorboard_root_log_dir=Path(config.tensorboard_root_log_dir),
            checkpoint_model_filepath=Path(config.checkpoint_model_filepath)
        )

        return prepare_callback_config

In [None]:
class PrepareCallback:
    def __init__(self, config:PrepareCallbacksConfig):
        self.config = config
        self.timestamp = time.strftime("%Y-%m-%d-%H-%M-%S")
        self.tb_running_log_dir = os.path.join(
            self.config.tensorboard_root_log_dir,
            f"tb_logs_at_{self.timestamp}",
        )
        self._writer = None
        self.best_loss = float('inf')

    @property
    def _create_tb_callbacks(self):
        """Equivalent of TensorBoard callback in TensorFlow"""
        if self._writer is None:
            self._writer = SummaryWriter(log_dir=self.tb_running_log_dir)
        return self._writer

    @property
    def _create_ckpt_callbacks(self):
        """Equivalent of ModelCheckpoint in TensorFlow"""
        def checkpoint_callback(model, current_loss):
            if current_loss < self.best_loss:
                self.best_loss = current_loss
                torch.save(model.state_dict(), self.config.checkpoint_model_filepath)
                print(f"[Checkpoint] Saved model with loss {current_loss:.4f}")
        return checkpoint_callback

    def get_tb_ckpt_callbacks(self):
        return [
            self._create_tb_callbacks,
            self._create_ckpt_callbacks
        ]

    def close(self):
        if self._writer is not None:
            self._writer.close()

In [18]:
try:
    config = ConfigurationManager()
    prepare_callbacks_config = config.get_prepare_callback_config()
    prepare_callbacks = PrepareCallback(config=prepare_callbacks_config)
    callback_list = prepare_callbacks.get_tb_ckpt_callbacks()
    
except Exception as e:
    raise e

[2025-05-18 13:08:52,151] [14] [tumorClassifierLogger] - INFO - YAML file loaded successfully: config\config.yaml
[2025-05-18 13:08:52,155] [14] [tumorClassifierLogger] - INFO - YAML file loaded successfully: params.yaml
[2025-05-18 13:08:52,157] [27] [tumorClassifierLogger] - INFO - Created directory at: artifacts
[2025-05-18 13:08:52,158] [27] [tumorClassifierLogger] - INFO - Created directory at: artifacts\prepare_callbacks\checkpoint_dir
[2025-05-18 13:08:52,159] [27] [tumorClassifierLogger] - INFO - Created directory at: artifacts\prepare_callbacks\tensorboard_log_dir
