# Deep Learning with Transformer Method Demo Code

This notebook will contain the code necessary to generate the predictions file using preprocessing techniques and model generated from the `develop_deep.ipynb` notebook.

For this, a model named DINOv2 was used to classify the images after training it with the given dataset.

## Miscellaneous

In [4]:
# %pip install tensorflow transformers torch torchvision torchaudio scikit-learn opencv-python numpy pickle5 tqdm -q

# To hide warnings produced by different packages
import warnings
warnings.filterwarnings('ignore')

## Imports

In [5]:
import torch
from torch import nn

from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

from copy import deepcopy
import typing
import os
from pathlib import Path
import numpy as np
from PIL import Image

import cv2
import cv2.typing as cv_typing
from tensorflow.keras.preprocessing import image
import csv
import pandas as pd

## Constants

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

classes = ["Bacteria", "Fungi", "Healthy", "Pest", "Phytopthora", "Virus"]

# Path of where the train images are located
# img_dir = "/content/drive/MyDrive/DCS/CS180/Project/potato_test"
img_dir = "../data/potato_test" # if local

# Path for final model
# model_dir = "/content/drive/MyDrive/DCS/CS180/Project/models"
model_dir = "../models" # if local

# Other constants
ORIG_IMG_SIZE = (1500,1500)
BATCH_SIZE = 8
seed_value = 42
RESIZE_IMG = (420, 420)

# Set seed
torch.manual_seed(seed_value)

<torch._C.Generator at 0x1f8bba0c870>

In [7]:
device

device(type='cuda')

In [8]:
# Load the vits14 version of dinov2
dino_model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')
dino_model

Using cache found in C:\Users\Admin/.cache\torch\hub\facebookresearch_dinov2_main


DinoVisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 384, kernel_size=(14, 14), stride=(14, 14))
    (norm): Identity()
  )
  (blocks): ModuleList(
    (0-11): 12 x NestedTensorBlock(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): MemEffAttention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): LayerScale()
      (drop_path1): Identity()
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=384, out_features=1536, bias=True)
        (act): GELU(approximate='none')
        (fc2): Linear(in_features=1536, out_features=384, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
      (ls2): LayerScale()
      (drop_path2): Identity()
    )
  )
  (n

In [9]:
class DinoVisionTransformerClassifier(nn.Module):
    def __init__(self):
        super(DinoVisionTransformerClassifier, self).__init__()
        self.transformer = deepcopy(dino_model)
        self.classifier = nn.Sequential(nn.Dropout(0.7), nn.ReLU(), nn.Linear(in_features=384, out_features=len(classes), bias=True))

    def forward(self, x):
        x = self.transformer(x)
        x = self.transformer.norm(x)
        x = self.classifier(x)
        return x

In [10]:
# Load the model
with open(Path(f"{model_dir}/dino_model_final.pth"), 'rb') as file:
    torch.serialization.add_safe_globals([DinoVisionTransformerClassifier])
    loaded_model = torch.load(f"{model_dir}/dino_model_final.pth", map_location=device, weights_only=False)
    loaded_model = loaded_model.to(device)
    loaded_model.eval()

## Functions

### Loading Data

`load_images` takes the directory where the test images are located and loads them into a program as a list

In [11]:
def load_images(
    file_path: str = "./potato_test",
    resize_dim: tuple[int, int] = (518, 518),
) -> list[cv_typing.MatLike]:
    # Get folder
    dir = Path(file_path)

    # Check if directory
    if not dir.is_dir():
        raise Exception("Please enter a valid directory")

    # Get all images in the dir
    imgs = [os.path.join(dir, f) for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]

    # Variable for final array
    final_imgs: list[cv_typing.MatLike] = []
    final_filenames: list[str] = []

    try:
        for img_path in imgs:
            img_loaded: Image.Image = image.load_img(img_path, target_size=ORIG_IMG_SIZE)
            img_array: np.ndarray[typing.Any, typing.Any] = image.img_to_array(img_loaded)
            img_array = cv2.resize(img_array, resize_dim)
            final_imgs.append(img_array)

            filename = Path(img_path).name
            final_filenames.append(filename)
    except Exception as e:
        print(f"Failed to load images: {e}")

    return final_imgs, final_filenames

### Preprocessing

The `preprocess_img` takes a list of images, preprocessing the images by simply resizing them, and turns it into a `DataLoader` for the model to process

In [12]:
train_transform = transforms.Compose([
    transforms.Resize(RESIZE_IMG),
    transforms.ToTensor(),
])

class PotatoLeafDisease(Dataset):
    def __init__(self, imgs: list[np.ndarray], transforms: transforms.Compose):
        self.imgs = imgs
        self.transforms = transforms

    def __len__(self) -> int:
        return len(self.imgs)

    def __getitem__(self, idx: int) -> torch.Tensor:
        img = self.imgs[idx]

        # Convert numpy image to PIL Image
        image = Image.fromarray(img.astype(np.uint8))

        # Apply transform
        if self.transforms:
            image = self.transforms(image)

        return image

In [13]:
def preprocess_img(
    imgs: list[np.ndarray[typing.Any, typing.Any]] = [],
) -> DataLoader:

    leaves_data = PotatoLeafDisease(imgs, transforms=train_transform)
    return DataLoader(leaves_data, batch_size=BATCH_SIZE, shuffle=False)

### Get Class Labels

`get_labels` simply converts the numerical labeling produced by the model into the actual class label names (e.g. "Healthy" instead of 2)

In [14]:
def get_labels(
    y: np.ndarray[typing.Any, typing.Any],
    classes: list[str] = ["Bacteria", "Fungi", "Healthy", "Pest", "Phytopthora", "Virus"],
) -> np.ndarray[typing.Any, typing.Any]:
    fxn = lambda x: classes[x]
    applyall = np.vectorize(fxn)
    return applyall(y)

## Running

In [None]:
# Load the images to predict
imgs_to_pred, filenames = load_images(img_dir)
dataloader = preprocess_img(imgs_to_pred)

# Make predictions
all_preds = []
with torch.no_grad():
    for batch in dataloader:
        batch = batch.to(device)
        preds = loaded_model(batch)
        predicted_classes = torch.argmax(preds, dim=1)
        all_preds.extend(predicted_classes.cpu().numpy())

# Turn into actual labels
final_labels = get_labels(all_preds)

# Save as csv file
with open('../predictions/group8_dino_predictions.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["image_filename", "predicted_label"])
    writer.writerows(zip(filenames, final_labels))

## Checking

In [16]:
print(len(filenames), len(final_labels))

301 301


In [None]:
df = pd.read_csv("../predictions/group8_dino_predictions.csv")

display(df.head(), df.shape)
display(df['predicted_label'].value_counts())

Unnamed: 0,image_filename,predicted_label
0,0.jpeg,Pest
1,1.jpeg,Healthy
2,10.jpeg,Healthy
3,100.jpeg,Fungi
4,101.jpeg,Fungi


(301, 2)

predicted_label
Fungi          76
Pest           67
Bacteria       56
Virus          48
Phytopthora    33
Healthy        21
Name: count, dtype: int64