# CCT model

## Overview

* Using a pretrained model that is from this repo: https://github.com/SHI-Labs/Compact-Transformers 
* Copied the cct main python file and the utils because it is not in a form of package that we can just import 
* Curretntly using cct model with pretrained imagenet

## Model choice
* cct_14_7x2_224
* cct_14_7x2_384

In [1]:
%pip install timm torchvision pandas pillow


Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 26.0 -> 26.0.1
[notice] To update, run: c:\Users\jessl\SMU (Master)\computer_vision\.venv\Scripts\python.exe -m pip install --upgrade pip


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import numpy as np
from PIL import Image
from pathlib import Path
from torchvision import transforms
import timm

  from .autonotebook import tqdm as notebook_tqdm


# Load dataset

In [3]:
class PlantDataset(Dataset):
    def __init__(self, df, repo_root, img_size=224, transform=None):
        self.df = df.reset_index(drop=True)
        self.repo_root = Path(repo_root)

        if transform is None:
            self.transform = transforms.Compose([
                transforms.Resize((img_size, img_size)),
                transforms.ToTensor(),
                transforms.Normalize(
                    mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225]
                )
            ])
        else:
            self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        img_path = self.repo_root / row["filepath_rel"]
        if not img_path.exists():
            raise FileNotFoundError(f"Image not found: {img_path}")

        img = Image.open(img_path).convert("RGB")
        img = self.transform(img)

        label = int(row["canonical_id"])
        return img, label


In [4]:
repo_root = Path.cwd().parent
splits_dir = repo_root / "data" / "splits"


train_csv = pd.read_csv(splits_dir / "pv_train.csv")
val_csv   = pd.read_csv(splits_dir / "pv_val.csv")
test_csv  = pd.read_csv(splits_dir / "pv_test.csv")  

train_dataset = PlantDataset(train_csv, repo_root)
val_dataset   = PlantDataset(val_csv, repo_root)
test_dataset  = PlantDataset(test_csv, repo_root)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Quick sanity check
imgs, labels = next(iter(train_loader))
print(imgs.shape) 
print(labels[:10])

torch.Size([32, 3, 224, 224])
tensor([ 4, 14, 25, 10, 23,  7,  0, 17,  0, 25])


# setup model

In [5]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

In [6]:
from cct.cct import cct_14_7x2_224 

# load pretrained
model = cct_14_7x2_224(pretrained=True, progress=True, num_classes=26)  
for name, param in model.named_parameters():
    print(name)

trainable_params = [p for p in model.parameters() if p.requires_grad]
print("Trainable parameters:", len(trainable_params))



Removing classifier.fc.weight, number of classes has changed.
Removing classifier.fc.bias, number of classes has changed.


tokenizer.conv_layers.0.0.weight
tokenizer.conv_layers.1.0.weight
classifier.positional_emb
classifier.attention_pool.weight
classifier.attention_pool.bias
classifier.blocks.0.pre_norm.weight
classifier.blocks.0.pre_norm.bias
classifier.blocks.0.self_attn.qkv.weight
classifier.blocks.0.self_attn.proj.weight
classifier.blocks.0.self_attn.proj.bias
classifier.blocks.0.linear1.weight
classifier.blocks.0.linear1.bias
classifier.blocks.0.norm1.weight
classifier.blocks.0.norm1.bias
classifier.blocks.0.linear2.weight
classifier.blocks.0.linear2.bias
classifier.blocks.1.pre_norm.weight
classifier.blocks.1.pre_norm.bias
classifier.blocks.1.self_attn.qkv.weight
classifier.blocks.1.self_attn.proj.weight
classifier.blocks.1.self_attn.proj.bias
classifier.blocks.1.linear1.weight
classifier.blocks.1.linear1.bias
classifier.blocks.1.norm1.weight
classifier.blocks.1.norm1.bias
classifier.blocks.1.linear2.weight
classifier.blocks.1.linear2.bias
classifier.blocks.2.pre_norm.weight
classifier.blocks.2.pr

In [7]:
# freeze the whole layer other thant
for name, param in model.named_parameters():
    if "classifier.fc" in name:  
        param.requires_grad = True
    else:
        param.requires_grad = False

trainable_params = [p for p in model.parameters() if p.requires_grad]


print("Trainable parameters:", len(trainable_params))  
optimizer = torch.optim.Adam(trainable_params, lr=1e-4)
criterion = torch.nn.CrossEntropyLoss()

Trainable parameters: 2


In [10]:
EPOCHS = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = model.to(device)

ckpt_dir = repo_root / "model" / "checkpoints"
ckpt_dir.mkdir(exist_ok=True)

best_val_acc = 0.0
start_epoch = 0


In [None]:
EPOCHS = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)


for epoch in range(start_epoch, EPOCHS):

    # TRAIN
    model.train()
    train_loss = 0.0
    correct = 0
    total = 0

    train_loop = tqdm(
        train_loader,
        desc=f"Epoch {epoch+1}/{EPOCHS} [Train]",
        leave=False
    )

    for imgs, labels in train_loop:
        imgs = imgs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * imgs.size(0)
        _, predicted = outputs.max(1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        train_loop.set_postfix(
            loss=train_loss / total,
            acc=correct / total
        )

    train_loss /= total
    train_acc = correct / total

    # VALIDATION
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    val_loop = tqdm(
        val_loader,
        desc=f"Epoch {epoch+1}/{EPOCHS} [Val]",
        leave=False
    )

    with torch.no_grad():
        for imgs, labels in val_loop:
            imgs = imgs.to(device)
            labels = labels.to(device)

            outputs = model(imgs)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * imgs.size(0)
            _, predicted = outputs.max(1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

            val_loop.set_postfix(
                loss=val_loss / total,
                acc=correct / total
            )

    val_loss /= total
    val_acc = correct / total

    # CHECKPOINTING
    is_best = val_acc > best_val_acc
    if is_best:
        best_val_acc = val_acc

    checkpoint = {
        "epoch": epoch + 1,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
        "val_acc": val_acc
    }

    # Save latest
    torch.save(checkpoint, ckpt_dir / "last.pth")

    # Save best
    if is_best:
        torch.save(checkpoint, ckpt_dir / "best.pth")

    print(
        f"Epoch [{epoch+1}/{EPOCHS}] "
        f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
        f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f} "
        f"{'BEST' if is_best else ''}"
    )


                                                                                            

Epoch [1/10] Train Loss: 1.4437, Train Acc: 0.6720 | Val Loss: 0.7728, Val Acc: 0.8419 BEST


Epoch 2/10 [Train]:  41%|████▏     | 373/902 [1:12:47<28:25,  3.22s/it, acc=0.859, loss=0.733]     

# Test model performance

In [None]:
import torch
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def evaluate_model(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for imgs, labels in dataloader:
            imgs = imgs.to(device)
            labels = labels.to(device)

            outputs = model(imgs)
            preds = torch.argmax(outputs, dim=1)

            all_preds.append(preds.cpu().numpy())
            all_labels.append(labels.cpu().numpy())

    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    return all_labels, all_preds

In [None]:
# Test using test dataset
y_true_pd, y_pred_pd = evaluate_model(model, test_loader, device)

print("PlantDoc Test Results")
print("Accuracy :", accuracy_score(y_true_pd, y_pred_pd))
print("Precision:", precision_score(y_true_pd, y_pred_pd, average="macro"))
print("Recall   :", recall_score(y_true_pd, y_pred_pd, average="macro"))
print("F1       :", f1_score(y_true_pd, y_pred_pd, average="macro"))


In [None]:
repo_root = Path.cwd().parent
splits_dir = repo_root / "data" / "splits"


plantdoc_test_csv = pd.read_csv(splits_dir / "plantdoc_test_mapped.csv")  

plantdoc_test_dataset = PlantDataset(plantdoc_test_csv, repo_root)

plantdoc_test_loader  = DataLoader(plantdoc_test_dataset, batch_size=32, shuffle=False)


In [None]:
y_true_pd, y_pred_pd = evaluate_model(model, plantdoc_test_loader, device)

print("PlantDoc Test Results")
print("Accuracy :", accuracy_score(y_true_pd, y_pred_pd))
print("Precision:", precision_score(y_true_pd, y_pred_pd, average="macro"))
print("Recall   :", recall_score(y_true_pd, y_pred_pd, average="macro"))
print("F1       :", f1_score(y_true_pd, y_pred_pd, average="macro"))


NameError: name 'model' is not defined