## 1. Loading Data

In [1]:
import os

import torch
import torch.nn as nn
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

try:
    from torchinfo import summary
except:
    print("[INFO] Couldn't find torchinfo... installing it.")
    !pip install -q torchinfo
    from torchinfo import summary

In [2]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)

In [3]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [4]:
NUM_WORKERS = os.cpu_count()
BATCH_SIZE = 32

def create_dataloader(data_dir: str,
                      transform: transforms.Compose,
                      batch_size: int=BATCH_SIZE,
                      num_workers: int=NUM_WORKERS,
                      shuffle=True):
    
    data = datasets.ImageFolder(data_dir, transform=transform)
   
    class_names = data.classes
    
    data_loader = DataLoader(data,
                            batch_size=batch_size,
                            shuffle=shuffle,
                            num_workers=num_workers,
                            pin_memory=True)

    return data_loader, class_names

In [5]:
from pathlib import Path

data_path = Path("/kaggle/input/banglalekhaisolated")
image_path = data_path / "BanglaLekhaIsolated"

In [6]:
train_dir = image_path / "train"
test_dir = image_path / "test"
val_dir = image_path / "val"

## 2. Importing Model

In [7]:
# import timm

# model = timm.create_model('maxvit_base_tf_512.in21k_ft_in1k', pretrained=True)

In [8]:
weights = torchvision.models.EfficientNet_B0_Weights.DEFAULT # .DEFAULT = best available weights 
model = torchvision.models.efficientnet_b0(weights=weights)

#model # uncomment to output (it's very long)

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 74.9MB/s]


In [9]:
# data_config = timm.data.resolve_model_data_config(model)
# train_transforms = timm.data.create_transform(**data_config, is_training=True)
# eval_transforms = timm.data.create_transform(**data_config, is_training=False)

In [10]:
# Get the transforms used to create our pretrained weights
train_transforms = eval_transforms = weights.transforms()

### 3. Transforming Data

In [11]:
train_dataloader, class_names = create_dataloader(data_dir=train_dir, transform=train_transforms)
val_dataloader, _ = create_dataloader(data_dir=val_dir, transform=eval_transforms, shuffle=False)
test_dataloader, _ = create_dataloader(data_dir=test_dir, transform=eval_transforms, shuffle=False)

### 4. Changing the model head

In [12]:
# '''Freezing the parameters'''
# for param in model.parameters():
#     param.requires_grad = False

In [13]:
model

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [14]:
output_shape = len(class_names)
model.classifier = torch.nn.Sequential(
    torch.nn.Dropout(p=0.2, inplace=True), 
    torch.nn.Linear(in_features=1280, 
                    out_features=output_shape, 
                    bias=True))

In [15]:
# model.head.fc = nn.Linear(in_features=768, out_features=len(class_names))
# model.head

In [16]:
# PATH = '/kaggle/input/banglacharrecognition/models/best_model.pth'
# model.load_state_dict(torch.load(PATH))

In [17]:
summary(model, 
        input_size=(32, 3, 512, 512), # (batch_size, color_channels, height, width)
        verbose=0,
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
)

Layer (type (var_name))                                      Input Shape          Output Shape         Param #              Trainable
EfficientNet (EfficientNet)                                  [32, 3, 512, 512]    [32, 84]             --                   True
├─Sequential (features)                                      [32, 3, 512, 512]    [32, 1280, 16, 16]   --                   True
│    └─Conv2dNormActivation (0)                              [32, 3, 512, 512]    [32, 32, 256, 256]   --                   True
│    │    └─Conv2d (0)                                       [32, 3, 512, 512]    [32, 32, 256, 256]   864                  True
│    │    └─BatchNorm2d (1)                                  [32, 32, 256, 256]   [32, 32, 256, 256]   64                   True
│    │    └─SiLU (2)                                         [32, 32, 256, 256]   [32, 32, 256, 256]   --                   --
│    └─Sequential (1)                                        [32, 32, 256, 256]   [32, 16, 256

In [18]:
model = model.to(device)

### 5. Defining loss and optimizer

In [19]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

### 6. Perfomance metrics

In [20]:
from torchmetrics import Accuracy, Precision, Recall, F1Score, ConfusionMatrix
from torchmetrics import Metric

class PerformanceMetric:
    def __init__(self, num_classes):
        self.num_classes = num_classes
        self.accuracy = Accuracy(task="multiclass", num_classes=num_classes)
        self.precision = Precision(task="multiclass", num_classes=num_classes, average='macro')
        self.recall = Recall(task="multiclass", num_classes=num_classes, average='macro')
        self.f1 = F1Score(task="multiclass", num_classes=num_classes, average='macro')
        self.confusion_matrix = ConfusionMatrix(task="multiclass", num_classes=num_classes)

    def update(self, predictions, targets):
        self.accuracy.update(predictions, targets)
        self.precision.update(predictions, targets)
        self.recall.update(predictions, targets)
        self.f1.update(predictions, targets)
        self.confusion_matrix.update(predictions, targets)

    def compute(self):
        accuracy = self.accuracy.compute().item()
        precision = self.precision.compute().item()
        recall = self.recall.compute().item()
        f1 = self.f1.compute().item()
        confusion_matrix = self.confusion_matrix.compute().cpu()

        return accuracy, precision, recall, f1, confusion_matrix

    def reset(self):
        self.accuracy.reset()
        self.precision.reset()
        self.recall.reset()
        self.f1.reset()
        self.confusion_matrix.reset()


### 7. Training

In [21]:
# !pip install torchmetrics

In [22]:
from tqdm.auto import tqdm
from typing import Dict, List, Tuple

In [23]:
def train_step(model: torch.nn.Module, 
               num_classes: int,
               dataloader: torch.utils.data.DataLoader, 
               loss_fn: torch.nn.Module, 
               optimizer: torch.optim.Optimizer,
               device: torch.device):
    
    model.train()
    
    train_loss = 0    
    metrics = PerformanceMetric(num_classes)
    
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        
        y_pred = model(X)
        
        loss = loss_fn(y_pred, y)
        train_loss += loss.item()
        
        optimizer.zero_grad()        
        loss.backward()        
        optimizer.step()
        
        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        
        metrics.update(y_pred_class.cpu(), y.cpu())

    train_loss = train_loss / len(dataloader)
    train_acc, train_precision, train_recall, train_f1, train_confusion_matrix = metrics.compute()
    
    metrics.reset()
    
    return train_loss, train_acc, train_precision, train_recall, train_f1, train_confusion_matrix

In [24]:
def test_step(model: torch.nn.Module, 
              num_classes: int,
              dataloader: torch.utils.data.DataLoader, 
              loss_fn: torch.nn.Module,
              device: torch.device):

    model.eval()
    
    test_loss = 0
    metrics = PerformanceMetric(num_classes)

    with torch.inference_mode():
        for batch, (X, y) in enumerate(dataloader):
            X, y = X.to(device), y.to(device)

            test_pred_logits = model(X)

            loss = loss_fn(test_pred_logits, y)
            test_loss += loss.item()
            
            test_pred_labels = test_pred_logits.argmax(dim=1)
            metrics.update(test_pred_labels.cpu(), y.cpu())

    test_loss = test_loss / len(dataloader)
    test_acc, test_precision, test_recall, test_f1, test_confusion_matrix = metrics.compute()
    
    metrics.reset()
    
    return test_loss, test_acc, test_precision, test_recall, test_f1, test_confusion_matrix

In [25]:
from pathlib import Path

def save_model(model: torch.nn.Module, target_dir: str, model_name: str):
    target_dir_path = Path(target_dir)
    target_dir_path.mkdir(parents=True,
                        exist_ok=True)

    assert model_name.endswith(".pth") or model_name.endswith(".pt"), "model_name should end with '.pt' or '.pth'"
    model_save_path = target_dir_path / model_name

  
    print(f"[INFO] Saving model to: {model_save_path}")
    torch.save(obj=model.state_dict(), f=model_save_path)

In [26]:
def train(model: torch.nn.Module,
          num_classes: int,
          train_dataloader: torch.utils.data.DataLoader, 
          test_dataloader: torch.utils.data.DataLoader, 
          optimizer: torch.optim.Optimizer,
          loss_fn: torch.nn.Module,
          epochs: int,
          device: torch.device):
    
    results = {"train_loss": [], "train_acc": [], "train_precision": [], "train_recall": [], "train_f1": [],
              "test_loss": [], "test_acc": [], "test_precision": [], "test_recall": [], "test_f1": []}
    
    curr_best_model = model
    curr_loss = 0
    
    for epoch in tqdm(range(epochs)):
        train_loss, train_acc, train_precision, train_recall, train_f1, train_confusion_matrix = train_step(model=model,
                                                                                                        num_classes=num_classes,
                                                                                                        dataloader=train_dataloader,
                                                                                                        loss_fn=loss_fn,
                                                                                                        optimizer=optimizer,
                                                                                                        device=device)
        test_loss, test_acc, test_precision, test_recall, test_f1, test_confusion_matrix = test_step(model=model,
                                                                                                        num_classes=num_classes,
                                                                                                        dataloader=test_dataloader,
                                                                                                        loss_fn=loss_fn,
                                                                                                        device=device)
        
        if test_loss < curr_loss:
            curr_best_model = model
            curr_loss = test_loss

        print(f"Epoch: {epoch+1} | " 
                f"train_loss: {train_loss:.4f} | " 
                f"train_acc: {train_acc:.4f} | " 
                f"test_loss: {test_loss:.4f} | " 
                f"test_acc: {test_acc:.4f}")

        results["train_loss"].append(train_loss)
        results["train_acc"].append(train_acc)
        results["train_precision"].append(train_precision)
        results["train_recall"].append(train_recall)
        results["train_f1"].append(train_f1)
        results["test_loss"].append(test_loss)
        results["test_acc"].append(test_acc)
        results["test_precision"].append(test_precision)
        results["test_recall"].append(test_recall)
        results["test_f1"].append(test_f1)
        
    save_model(model=curr_best_model, target_dir='models', model_name='best_model.pth')

    return results

In [27]:
EPOCHS = 15

# Start the timer
from timeit import default_timer as timer 
start_time = timer()

results = train(model=model,
                num_classes=len(class_names),
                train_dataloader=train_dataloader,
                test_dataloader=val_dataloader,
                optimizer=optimizer,
                loss_fn=loss_fn,
                epochs=EPOCHS,
                device=device)

# End the timer and print out how long it took
end_time = timer()
print(f"[INFO] Total training time: {end_time-start_time:.3f} seconds")

  0%|          | 0/15 [00:00<?, ?it/s]

Epoch: 1 | train_loss: 0.5121 | train_acc: 0.8599 | test_loss: 0.2988 | test_acc: 0.9204
Epoch: 2 | train_loss: 0.3030 | train_acc: 0.9158 | test_loss: 0.2871 | test_acc: 0.9178
Epoch: 3 | train_loss: 0.2597 | train_acc: 0.9267 | test_loss: 0.2769 | test_acc: 0.9227
Epoch: 4 | train_loss: 0.2275 | train_acc: 0.9354 | test_loss: 0.2530 | test_acc: 0.9323
Epoch: 5 | train_loss: 0.2016 | train_acc: 0.9424 | test_loss: 0.2502 | test_acc: 0.9356
Epoch: 6 | train_loss: 0.1822 | train_acc: 0.9467 | test_loss: 0.2509 | test_acc: 0.9343
Epoch: 7 | train_loss: 0.1608 | train_acc: 0.9523 | test_loss: 0.2611 | test_acc: 0.9338
Epoch: 8 | train_loss: 0.1451 | train_acc: 0.9566 | test_loss: 0.2664 | test_acc: 0.9340
Epoch: 9 | train_loss: 0.1305 | train_acc: 0.9604 | test_loss: 0.2844 | test_acc: 0.9311
Epoch: 10 | train_loss: 0.1175 | train_acc: 0.9637 | test_loss: 0.2841 | test_acc: 0.9305
Epoch: 11 | train_loss: 0.1083 | train_acc: 0.9666 | test_loss: 0.2809 | test_acc: 0.9304
Epoch: 12 | train_l

### 7. saving the metrics

In [28]:
import csv
file_path = 'train_metrics.csv'

with open(file_path, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)   

In [29]:
# import shutil

# source_file = '/kaggle/input/banglacharrecognition/train_metrics.csv'
# destination_file = '/kaggle/working/train_metrics.csv'

# shutil.copyfile(source_file, destination_file)

In [30]:
import csv

headers = list(results.keys())

epoch_result = [list(col) for col in zip(*results.values())]

file_path = 'train_metrics.csv'

with open(file_path, 'a', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(headers)
    writer.writerows(epoch_result)

### 8. Loading the best model

In [31]:
PATH = '/kaggle/working/models/best_model.pth'
model.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [32]:
test_loss, test_acc, test_precision, test_recall, test_f1, test_confusion_matrix = test_step(model=model,
                                                                                            num_classes=len(class_names),
                                                                                            dataloader=test_dataloader,
                                                                                            loss_fn=loss_fn,
                                                                                            device=device)

In [33]:
PATH = 'test_metrics.txt'
with open(PATH, 'w') as fp:
    fp.write(f'Test Loss: {test_loss}\n')
    fp.write(f'Test Accuracy: {test_acc}\n')
    fp.write(f'Test Precision: {test_precision}\n')
    fp.write(f'Test Recall: {test_recall}\n')
    fp.write(f'Test F1 Score: {test_f1}\n')
    fp.write(f'Test Confusion Matrix: \n{test_confusion_matrix.numpy()}\n')