# CCT model

## Overview

* Using a pretrained model that is from this repo: https://github.com/SHI-Labs/Compact-Transformers 
* Copied the cct main python file and the utils because it is not in a form of package that we can just import 
* Curretntly using cct model with pretrained imagenet

## Model choice
* cct_14_7x2_224
* cct_14_7x2_384

In [4]:
%pip install timm torchvision pandas pillow


Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting timm
  Downloading timm-1.0.24-py3-none-any.whl.metadata (38 kB)
Collecting huggingface_hub (from timm)
  Downloading huggingface_hub-1.4.1-py3-none-any.whl.metadata (13 kB)
Collecting safetensors (from timm)
  Downloading safetensors-0.7.0-cp38-abi3-win_amd64.whl.metadata (4.2 kB)
Collecting hf-xet<2.0.0,>=1.2.0 (from huggingface_hub->timm)
  Downloading hf_xet-1.2.0-cp37-abi3-win_amd64.whl.metadata (5.0 kB)
Collecting httpx<1,>=0.23.0 (from huggingface_hub->timm)
  Downloading httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting shellingham (from huggingface_hub->timm)
  Downloading shellingham-1.5.4-py2.py3-none-any.whl.metadata (3.5 kB)
Collecting typer-slim (from huggingface_hub->timm)
  Downloading typer_slim-0.21.1-py3-none-any.whl.metadata (16 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->huggingface_hub->timm)
  Downloading httpcore-1.0.9-py3-none-any.whl.metadata (21 kB)
Downloadi


[notice] A new release of pip is available: 26.0 -> 26.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import numpy as np
from PIL import Image
from pathlib import Path
from torchvision import transforms

# Load dataset

In [6]:
class PlantDataset(Dataset):
    def __init__(self, df, repo_root, img_size=224, transform=None):
        self.df = df.reset_index(drop=True)
        self.repo_root = Path(repo_root)

        if transform is None:
            self.transform = transforms.Compose([
                transforms.Resize((img_size, img_size)),
                transforms.ToTensor(),
                transforms.Normalize(
                    mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225]
                )
            ])
        else:
            self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        img_path = self.repo_root / row["filepath_rel"]
        if not img_path.exists():
            raise FileNotFoundError(f"Image not found: {img_path}")

        img = Image.open(img_path).convert("RGB")
        img = self.transform(img)

        label = int(row["canonical_id"])
        return img, label


In [7]:
repo_root = Path.cwd().parent
splits_dir = repo_root / "data" / "splits"


train_csv = pd.read_csv(splits_dir / "pv_train.csv")
val_csv   = pd.read_csv(splits_dir / "pv_val.csv")
test_csv  = pd.read_csv(splits_dir / "pv_test.csv")  

train_dataset = PlantDataset(train_csv, repo_root)
val_dataset   = PlantDataset(val_csv, repo_root)
test_dataset  = PlantDataset(test_csv, repo_root)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Quick sanity check
imgs, labels = next(iter(train_loader))
print(imgs.shape) 
print(labels[:10])

torch.Size([32, 3, 224, 224])
tensor([ 2, 25, 14, 25, 14,  1, 22, 25, 18, 25])


# setup model

In [None]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import optim 


In [None]:
from cct.cct import cct_14_7x2_224 

# load pretrained
model = cct_14_7x2_224(pretrained=True, progress=True, num_classes=26)  
for name, param in model.named_parameters():
    print(name)

trainable_params = [p for p in model.parameters() if p.requires_grad]
print("Trainable parameters:", len(trainable_params))



Removing classifier.fc.weight, number of classes has changed.
Removing classifier.fc.bias, number of classes has changed.


tokenizer.conv_layers.0.0.weight
tokenizer.conv_layers.1.0.weight
classifier.positional_emb
classifier.attention_pool.weight
classifier.attention_pool.bias
classifier.blocks.0.pre_norm.weight
classifier.blocks.0.pre_norm.bias
classifier.blocks.0.self_attn.qkv.weight
classifier.blocks.0.self_attn.proj.weight
classifier.blocks.0.self_attn.proj.bias
classifier.blocks.0.linear1.weight
classifier.blocks.0.linear1.bias
classifier.blocks.0.norm1.weight
classifier.blocks.0.norm1.bias
classifier.blocks.0.linear2.weight
classifier.blocks.0.linear2.bias
classifier.blocks.1.pre_norm.weight
classifier.blocks.1.pre_norm.bias
classifier.blocks.1.self_attn.qkv.weight
classifier.blocks.1.self_attn.proj.weight
classifier.blocks.1.self_attn.proj.bias
classifier.blocks.1.linear1.weight
classifier.blocks.1.linear1.bias
classifier.blocks.1.norm1.weight
classifier.blocks.1.norm1.bias
classifier.blocks.1.linear2.weight
classifier.blocks.1.linear2.bias
classifier.blocks.2.pre_norm.weight
classifier.blocks.2.pr

In [None]:
# freeze the whole layer other thant
for name, param in model.named_parameters():
    if "classifier.fc" in name:  
        param.requires_grad = True
    else:
        param.requires_grad = False

trainable_params = [p for p in model.parameters() if p.requires_grad]


print("Trainable parameters:", len(trainable_params))  
optimizer = torch.optim.Adam(trainable_params, lr=1e-4)
criterion = torch.nn.CrossEntropyLoss()

Trainable parameters: 2


In [None]:
EPOCHS = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)


for epoch in range(EPOCHS):
    # TRAIN 
    model.train()
    train_loss = 0
    correct = 0
    total = 0

    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Train]", leave=False)
    for imgs, labels in loop:
        imgs, labels = imgs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * imgs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

        loop.set_postfix(train_loss=train_loss/total, train_acc=correct/total)

    train_loss /= total
    train_acc = correct / total

    # VALIDATION
    model.eval()
    val_loss = 0
    correct = 0
    total = 0

    loop_val = tqdm(val_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Val]", leave=False)
    with torch.no_grad():
        for imgs, labels in loop_val:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * imgs.size(0)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

            loop_val.set_postfix(val_loss=val_loss/total, val_acc=correct/total)

    val_loss /= total
    val_acc = correct / total

    print(f"Epoch [{epoch+1}/{EPOCHS}] "
          f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, "
          f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")



                                                                                                       

Epoch [1/10] Train Loss: 1.5272, Train Acc: 0.6497, Val Loss: 0.7991, Val Acc: 0.8339


                                                                                                        

Epoch [2/10] Train Loss: 0.6507, Train Acc: 0.8735, Val Loss: 0.4743, Val Acc: 0.9004


                                                                                                        

Epoch [3/10] Train Loss: 0.4421, Train Acc: 0.9086, Val Loss: 0.3493, Val Acc: 0.9262


                                                                                                        

Epoch [4/10] Train Loss: 0.3428, Train Acc: 0.9262, Val Loss: 0.2811, Val Acc: 0.9404


                                                                                                        

Epoch [5/10] Train Loss: 0.2812, Train Acc: 0.9385, Val Loss: 0.2395, Val Acc: 0.9443


                                                                                                        

Epoch [6/10] Train Loss: 0.2444, Train Acc: 0.9450, Val Loss: 0.2089, Val Acc: 0.9520


                                                                                                        

Epoch [7/10] Train Loss: 0.2156, Train Acc: 0.9519, Val Loss: 0.1869, Val Acc: 0.9551


                                                                                                        

Epoch [8/10] Train Loss: 0.1939, Train Acc: 0.9554, Val Loss: 0.1722, Val Acc: 0.9573


                                                                                                        

Epoch [9/10] Train Loss: 0.1775, Train Acc: 0.9579, Val Loss: 0.1579, Val Acc: 0.9612


                                                                                                         

Epoch [10/10] Train Loss: 0.1652, Train Acc: 0.9608, Val Loss: 0.1467, Val Acc: 0.9639




# Test model performance

In [None]:
import torch
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def evaluate_model(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for imgs, labels in dataloader:
            imgs = imgs.to(device)
            labels = labels.to(device)

            outputs = model(imgs)
            preds = torch.argmax(outputs, dim=1)

            all_preds.append(preds.cpu().numpy())
            all_labels.append(labels.cpu().numpy())

    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    return all_labels, all_preds

In [None]:
# Test using test dataset
y_true_pd, y_pred_pd = evaluate_model(model, test_loader, device)

print("PlantDoc Test Results")
print("Accuracy :", accuracy_score(y_true_pd, y_pred_pd))
print("Precision:", precision_score(y_true_pd, y_pred_pd, average="macro"))
print("Recall   :", recall_score(y_true_pd, y_pred_pd, average="macro"))
print("F1       :", f1_score(y_true_pd, y_pred_pd, average="macro"))


In [9]:
repo_root = Path.cwd().parent
splits_dir = repo_root / "data" / "splits"


plantdoc_test_csv = pd.read_csv(splits_dir / "plantdoc_test_mapped.csv")  

plantdoc_test_dataset = PlantDataset(plantdoc_test_csv, repo_root)

plantdoc_test_loader  = DataLoader(plantdoc_test_dataset, batch_size=32, shuffle=False)


In [11]:
y_true_pd, y_pred_pd = evaluate_model(model, plantdoc_test_loader, device)

print("PlantDoc Test Results")
print("Accuracy :", accuracy_score(y_true_pd, y_pred_pd))
print("Precision:", precision_score(y_true_pd, y_pred_pd, average="macro"))
print("Recall   :", recall_score(y_true_pd, y_pred_pd, average="macro"))
print("F1       :", f1_score(y_true_pd, y_pred_pd, average="macro"))


NameError: name 'model' is not defined