# Traditional Approach Demo Code

This notebook will contain the code necessary to generate the predictions file using preprocessing techniques and model generated from the `develop_trad.ipynb` notebook.

For this, a multiclass SVM is used with DINOv2 as the model used for feature extraction for image preprocessing

## Miscellaneous

In [None]:
%pip install tensorflow transformers torch torchvision torchaudio scikit-learn opencv-python numpy pickle5 tqdm -q

# To hide warnings produced by different packages
import warnings
warnings.filterwarnings('ignore')

## Imports

In [None]:
import cv2
import cv2.typing as cv_typing
from pathlib import Path
import os
import numpy as np
import tensorflow as tf

from PIL import Image
from tensorflow.keras.preprocessing import image
from transformers import AutoImageProcessor, Dinov2Model
import torch

import pickle
from tqdm import tqdm
import typing

## Constants

In [36]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

classes = ["Bacteria", "Fungi", "Healthy", "Pest", "Phytopthora", "Virus"]

# Path of where the train images are located
# img_dir = "/content/drive/MyDrive/BSCS/CS180/Project/potato_test"
img_dir = "../data/potato_test" # if local

# Path for final model
# model_dir = "/content/drive/MyDrive/BSCS/CS180/Project/models"
model_dir = "../models" # if local

# Other constants
ORIG_IMG_SIZE = (1500,1500)
BATCH_SIZE = 8
seed_value = 42

# Set seed
tf.random.set_seed(seed_value)
np.random.seed(seed_value)
torch.manual_seed(seed_value)

<torch._C.Generator at 0x19d1c8aa8f0>

In [4]:
# Dinov2 processor and model
processor = AutoImageProcessor.from_pretrained("facebook/dinov2-large", use_fast=True)
dino_model = Dinov2Model.from_pretrained("facebook/dinov2-large").to(device)
dino_model.eval()

`use_fast` is set to `True` but the image processor class does not have a fast version.  Falling back to the slow version.


Dinov2Model(
  (embeddings): Dinov2Embeddings(
    (patch_embeddings): Dinov2PatchEmbeddings(
      (projection): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14))
    )
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (encoder): Dinov2Encoder(
    (layer): ModuleList(
      (0-23): 24 x Dinov2Layer(
        (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (attention): Dinov2Attention(
          (attention): Dinov2SelfAttention(
            (query): Linear(in_features=1024, out_features=1024, bias=True)
            (key): Linear(in_features=1024, out_features=1024, bias=True)
            (value): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (output): Dinov2SelfOutput(
            (dense): Linear(in_features=1024, out_features=1024, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
        )
        (layer_scale1): Dinov2LayerScale()
        (drop_path): Identity()
        (norm2): LayerNorm((1024,),

In [37]:
# Load the model
with open(Path(f"{model_dir}/svm_model_final.pkl"), 'rb') as file:
    loaded_model = pickle.load(file)

## Functions

### Loading Data

`load_images` takes the directory where the test images are located and loads them into a program as a list

In [21]:
def load_images(
    file_path: str = "./potato_test",
    resize_dim: tuple[int, int] = (518, 518),
) -> list[cv_typing.MatLike]:
    # Get folder
    dir = Path(file_path)

    # Check if directory
    if not dir.is_dir():
        raise Exception("Please enter a valid directory")

    # Get all images in the dir
    imgs = [os.path.join(dir, f) for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]

    # Variable for final array
    final_imgs: list[cv_typing.MatLike] = []

    try:
        for img_path in imgs:
            img_loaded: Image.Image = image.load_img(img_path, target_size=ORIG_IMG_SIZE)
            img_array: np.ndarray[typing.Any, typing.Any] = image.img_to_array(img_loaded)
            img_array = cv2.resize(img_array, resize_dim)
            final_imgs.append(img_array)
    except Exception as e:
        print(f"Failed to load images: {e}")
    
    return final_imgs

### Preprocessing through Feature Extraction

The `preprocess_img` takes a list of images and preprocesses them using DINOv2 by taking the features of the image.

In [26]:
def preprocess_img(
    imgs: list[np.ndarray[typing.Any, typing.Any]] = [],   
) -> np.ndarray[typing.Any, typing.Any]:
    all_features = []

    # Split into batches
    for i in tqdm(range(0, len(imgs), BATCH_SIZE)):
        batch = imgs[i:i+BATCH_SIZE]

        # Limit pixel values and convert each image to PIL
        batch = [Image.fromarray(np.clip(img, 0, 255).astype(np.uint8)) for img in batch]

        # Preprocess and move to GPU
        print(f"\tProcesing batch {i}")
        inputs = processor(images=batch, return_tensors="pt").to(device)
        print("\tFinished processing the batch")

        # Forward pass
        with torch.no_grad():
            outputs = dino_model(**inputs)
            features = outputs.pooler_output
            print("\tFeatures taken using  DINOv2")

        all_features.append(features.cpu().numpy())
        print("\tCompleted all batches for  DINOv2\n")

    # Combine all batches into one
    return np.vstack(all_features)

### Get Class Labels

`get_labels` simply converts the numerical labeling produced by the model into the actual class label names (e.g. "Healthy" instead of 2)

In [39]:
def get_labels(
    y: np.ndarray[typing.Any, typing.Any],
    classes: list[str] = ["Bacteria", "Fungi", "Healthy", "Pest", "Phytopthora", "Virus"],
) -> np.ndarray[typing.Any, typing.Any]:
    fxn = lambda x: classes[x]
    applyall = np.vectorize(fxn)
    return applyall(y)

## Running

In [None]:
# Load the images to predict
imgs_to_pred = load_images(img_dir)

# Preprocess images using DINOv2
processed_img = preprocess_img(imgs_to_pred)

# Make predictions
preds = loaded_model.predict(processed_img)

# Turn into actual labels
final_labels = get_labels(preds)

# Save as text file
with open(Path("../predictions/pred_trad.txt"), "w") as file:
    file.write("\n".join(list(final_labels)))

  0%|          | 0/1 [00:00<?, ?it/s]

	Procesing batch 0
	Finished processing the batch


100%|██████████| 1/1 [00:00<00:00,  1.97it/s]

	Features taken using  DINOv2
	Completed all batches for  DINOv2




