In [1]:
import os
os.chdir("../")

In [2]:
import torch
from pathlib import Path
from dataclasses import dataclass
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from src.vision_Transformer.constants import *
from src.vision_Transformer.utils.common import read_yaml, create_directories

In [16]:
@dataclass(frozen = True)
class ModelTrainerConfig:
    root_dir : Path
    train_accuracy : Path
    train_loss : Path
    data_dir : Path


    batch_size : int
    epochs : int 
    learning_rate : float
    patch_size : int
    num_classes : int
    image_size : int 
    channels : int
    embed_dim : int
    num_heads: int
    depth : int
    mlp_dim : int
    dropout_rate : float
    weight_decay : float


In [18]:
class ConfigurationManager:
    def __init__(self, config_file_path = CONFIG_FILE_PATH ,params_file_path = PARAMS_FILE_PATH):
        self.config = read_yaml(config_file_path)
        self.params = read_yaml(params_file_path)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:

        config = self.config.model_trainer
        params = self.params.TrainingArguments

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir= config.root_dir,
            train_accuracy = config.train_accuracy,
            train_loss = config.train_loss,
            data_dir = config.data_dir,

            batch_size= params.BATCH_SIZE,
            epochs = params.EPOCHS,
            learning_rate = params.LEARNING_RATE,
            patch_size = params.PATCH_SIZE,
            num_classes = params.NUM_CLASSES,
            image_size = params.IMAGE_SIZE,
            channels = params.CHANNELS,
            embed_dim  = params.EMBED_DIM,
            num_heads = params.NUM_HEADS,
            depth = params.DEPTH,
            mlp_dim = params.MLP_DIM,
            dropout_rate = params.DROPOUT_RATE,
            weight_decay  = params.WEIGHT_DECAY
        )
        return model_trainer_config

In [19]:
from torch.utils.data import DataLoader

In [20]:
from src.vision_Transformer.logging import logger

In [35]:
class DataTransformation:
    def __init__(self , config : ModelTrainerConfig):
        self.config = config

    def data_augmentation(self):
        self.after_transforms = transforms.Compose([
            transforms.RandomCrop(32 , padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(brightness=0.2 ,contrast= 0.2, saturation=0.2 , hue=0.1),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.5]*3 , std = [0.5]*3)
        ])
    
    def transformed_dataset(self):
        transformed_train_dataset = datasets.CIFAR10(
            root = self.config.data_dir,
            train = True,
            download= False,
            transform= self.after_transforms,
        )
        logger.info(f"Train Dataset Transformed Successfully")
        print(f"Train Dataset Transformed Successfully")

        return transformed_train_dataset

In [36]:
config = ConfigurationManager()
model_trainer_config = config.get_model_trainer_config()

data_transfromation = DataTransformation(model_trainer_config)
data_transfromation.data_augmentation()

train_dataset= data_transfromation.transformed_dataset()

[2025-08-09 13:37:57,183 : INFO : common  : yaml file config\config.yaml was read succesfully]
[2025-08-09 13:37:57,188 : INFO : common  : yaml file params.yaml was read succesfully]
[2025-08-09 13:37:57,189 : INFO : common  : Created directory at : artifacts]
[2025-08-09 13:37:57,192 : INFO : common  : Created directory at : artifacts/model/trained_model]
[2025-08-09 13:37:57,899 : INFO : 3539883775  : Train Dataset Transformed Successfully]
Train Dataset Transformed Successfully


In [41]:
from src.vision_Transformer.Components.ViT_Component.Vision_Transformer_Class import Vision_Transformer_Class
from tqdm.auto import tqdm
import torch.optim as optim
import torch.nn as nn
import json

In [42]:
class Model_trainer:
    def __init__(self, config: ModelTrainerConfig , train_dataset):
        self.config = config
        
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        self.train_dataset = train_dataset

        self.train_loader = DataLoader(train_dataset , batch_size = self.config.batch_size ,shuffle = True ,pin_memory= True)

        self.model = Vision_Transformer_Class(
            image_size = self.config.image_size,
            patch_size = self.config.patch_size,
            in_channels = self.config.channels,
            num_classes = self.config.num_classes,
            embed_dim = self.config.embed_dim,
            num_heads = self.config.num_heads,  # 8
            depth = self.config.depth,      # 6
            mlp_dim = self.config.mlp_dim,
            dropout_rate = self.config.dropout_rate
        ).to(self.device)


    # ------------------------------------------------------
        self.criterion = nn.CrossEntropyLoss(label_smoothing= 0.1)
        self.optimizer = optim.AdamW(self.model.parameters() , lr = float(self.config.learning_rate) , weight_decay= self.config.weight_decay)
        self.scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer= self.optimizer , T_max= self.config.epochs)
    
    # ---------------------------------------------------------------------------
    def show_model(self):
        print("\n\n------------------------------->Model Configuration<------------------------------------")
        print("\n", self.model)

        print("\n\n\n", self.device)
        
    def train(self):
        #set the model to the training mode 
        self.model.train()
        

        total_loss , correct_prediction = 0 , 0

        for x , y in self.train_loader:
            x , y = x.to(self.device) , y.to(self.device)

            self.optimizer.zero_grad()

            output = self.model(x)

            loss = self.criterion(output , y)

            loss.backward()
            self.optimizer.step()


            total_loss += loss.item() * x.size(0)

            correct_prediction += (output.argmax(1) == y).sum().item()

        return total_loss/len(self.train_loader.dataset) , correct_prediction / len(self.train_loader.dataset)
    

    def model_train_pipeline(self):
        train_logs = []

        train_accuracy_file = self.config.train_accuracy
        os.makedirs(os.path.dirname(train_accuracy_file) , exist_ok= True)


        for epoch in tqdm(range(self.config.epochs)):

            train_loss , train_acc = self.train()

            log = {
            "epoch": f" {epoch+1} /{self.config.epochs}",
            "train_loss": f" {train_loss:.4f}",
            "train_acc": f" {train_acc:.4f}%"
            }

            train_logs.append(log)

            print(f"Epoch: {epoch+1}/{self.config.epochs}, Train loss: {train_loss:.4f}, Train acc: {train_acc:.4f}%")

        with open(train_accuracy_file ,'w') as f:
                json.dump(log , f)


    def save_model(self):
        # ===============================
        # 1. Save model parameters only (state_dict)
        # ===============================

        state_dict_dir = self.config.root_dir
        os.makedirs(state_dict_dir , exist_ok= True)

        state_dict_path = os.path.join(state_dict_dir , "model_weights.pth")
        torch.save(self.model.state_dict() , state_dict_path)
        print(f"Saved model weights (state_dict) to: {state_dict_path}")  

                
        # ===============================
        # 2. Save full model (architecture + parameters)
        # ===============================  
        full_model_dir = self.config.root_dir
        os.makedirs(full_model_dir , exist_ok= True)
        
        full_model_path = os.path.join(full_model_dir , "complete_model.pth")
        torch.save(self.model , full_model_path)
        print(f"Saved full model to: {full_model_path}")

In [44]:
try:
    config = ConfigurationManager()

    model_trainer_config = config.get_model_trainer_config()
    data_transfromation = DataTransformation(model_trainer_config)
    data_transfromation.data_augmentation()

    train_dataset = data_transfromation.transformed_dataset()

    model_trainer = Model_trainer(model_trainer_config , train_dataset=train_dataset )

    model_trainer.show_model()

    print("\n-----------------------Model Training----------------------------")
    model_trainer.model_train_pipeline()

    print("\n-----------------------Model Training Completed----------------------------")
    model_trainer.save_model()

except Exception as e:
  raise e

[2025-08-09 14:18:59,400 : INFO : common  : yaml file config\config.yaml was read succesfully]
[2025-08-09 14:18:59,406 : INFO : common  : yaml file params.yaml was read succesfully]
[2025-08-09 14:18:59,408 : INFO : common  : Created directory at : artifacts]
[2025-08-09 14:18:59,412 : INFO : common  : Created directory at : artifacts/model/trained_model]
[2025-08-09 14:19:00,134 : INFO : 3539883775  : Train Dataset Transformed Successfully]
Train Dataset Transformed Successfully


------------------------------->Model Configuration<------------------------------------

 Vision_Transformer_Class(
  (patch_embedding): PatchEmbedding(
    (projection): Conv2d(3, 256, kernel_size=(4, 4), stride=(4, 4))
  )
  (encoder_layer): Sequential(
    (0): Transformer_Encoder_Layer(
      (normalization_layer_1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
      (multi_head_attention): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=25

  4%|▍         | 1/25 [02:26<58:29, 146.22s/it]

Epoch: 1/25, Train loss: 2.0093, Train acc: 0.2919%


  4%|▍         | 1/25 [03:33<1:25:35, 213.98s/it]


KeyboardInterrupt: 