# Traditional Approach Demo Code

This notebook will contain the code necessary to generate the predictions file using preprocessing techniques and model generated from the `develop_trad.ipynb` notebook.

For this, a multiclass SVM is used with DINOv2 as the model used for feature extraction for image preprocessing

## Miscellaneous

In [1]:
# %pip install tensorflow transformers torch torchvision torchaudio scikit-learn opencv-python numpy pickle5 tqdm -q

# To hide warnings produced by different packages
import warnings
warnings.filterwarnings('ignore')

## Imports

In [2]:
import cv2
import cv2.typing as cv_typing
from pathlib import Path
import os
import numpy as np
import tensorflow as tf

from PIL import Image
from tensorflow.keras.preprocessing import image
from transformers import AutoImageProcessor, Dinov2Model
import torch

import pandas as pd
import pickle
from tqdm import tqdm
import typing
import csv

## Constants

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

classes = ["Bacteria", "Fungi", "Healthy", "Pest", "Phytopthora", "Virus"]

# Path of where the train images are located
# img_dir = "/content/drive/MyDrive/BSCS/CS180/Project/potato_test"
img_dir = "../data/potato_test" # if local

# Path for final model
# model_dir = "/content/drive/MyDrive/BSCS/CS180/Project/models"
model_dir = "../models" # if local

# Other constants
ORIG_IMG_SIZE = (1500,1500)
BATCH_SIZE = 8
seed_value = 42

# Set seed
tf.random.set_seed(seed_value)
np.random.seed(seed_value)
torch.manual_seed(seed_value)

<torch._C.Generator at 0x1d3efefe870>

In [4]:
# Dinov2 processor and model
processor = AutoImageProcessor.from_pretrained("facebook/dinov2-large", use_fast=True)
dino_model = Dinov2Model.from_pretrained("facebook/dinov2-large").to(device)
dino_model.eval()

`use_fast` is set to `True` but the image processor class does not have a fast version.  Falling back to the slow version.


Dinov2Model(
  (embeddings): Dinov2Embeddings(
    (patch_embeddings): Dinov2PatchEmbeddings(
      (projection): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14))
    )
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (encoder): Dinov2Encoder(
    (layer): ModuleList(
      (0-23): 24 x Dinov2Layer(
        (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
        (attention): Dinov2Attention(
          (attention): Dinov2SelfAttention(
            (query): Linear(in_features=1024, out_features=1024, bias=True)
            (key): Linear(in_features=1024, out_features=1024, bias=True)
            (value): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (output): Dinov2SelfOutput(
            (dense): Linear(in_features=1024, out_features=1024, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
        )
        (layer_scale1): Dinov2LayerScale()
        (drop_path): Identity()
        (norm2): LayerNorm((1024,),

In [5]:
# Load the model
with open(Path(f"{model_dir}/svm_model_final.pkl"), 'rb') as file:
    loaded_model = pickle.load(file)

## Functions

### Loading Data

`load_images` takes the directory where the test images are located and loads them into a program as a list. Also returns the the filenames of the images

In [None]:
def load_images(
    file_path: str = "./potato_test",
    resize_dim: tuple[int, int] = (518, 518),
) -> tuple[list[cv_typing.MatLike], list[str]]:
    # Get folder
    dir = Path(file_path)

    # Check if directory
    if not dir.is_dir():
        raise Exception("Please enter a valid directory")

    # Get all images in the dir
    imgs = [os.path.join(dir, f) for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]

    # Variable for final array
    final_imgs: list[cv_typing.MatLike] = []
    final_filenames: list[str] = []

    try:
        for img_path in imgs:
            img_loaded: Image.Image = image.load_img(img_path, target_size=ORIG_IMG_SIZE)
            img_array: np.ndarray[typing.Any, typing.Any] = image.img_to_array(img_loaded)
            img_array = cv2.resize(img_array, resize_dim)
            final_imgs.append(img_array)

            # Get filename
            filename = img_path.split("\\")[-1]
            # filename = Path(img_path).name

            final_filenames.append(filename)
    except Exception as e:
        print(f"Failed to load images: {e}")
    
    return final_imgs, final_filenames

### Preprocessing through Feature Extraction

The `preprocess_img` takes a list of images and preprocesses them using DINOv2 by taking the features of the image.

In [7]:
def preprocess_img(
    imgs: list[np.ndarray[typing.Any, typing.Any]] = [],   
) -> np.ndarray[typing.Any, typing.Any]:
    all_features = []

    # Split into batches
    for i in tqdm(range(0, len(imgs), BATCH_SIZE)):
        batch = imgs[i:i+BATCH_SIZE]

        # Limit pixel values and convert each image to PIL
        batch = [Image.fromarray(np.clip(img, 0, 255).astype(np.uint8)) for img in batch]

        # Preprocess and move to GPU
        print(f"\tProcesing batch {i}")
        inputs = processor(images=batch, return_tensors="pt").to(device)
        print("\tFinished processing the batch")

        # Forward pass
        with torch.no_grad():
            outputs = dino_model(**inputs)
            features = outputs.pooler_output
            print("\tFeatures taken using  DINOv2")

        all_features.append(features.cpu().numpy())
        print("\tCompleted all batches for  DINOv2\n")

    # Combine all batches into one
    return np.vstack(all_features)

### Get Class Labels

`get_labels` simply converts the numerical labeling produced by the model into the actual class label names (e.g. "Healthy" instead of 2)

In [8]:
def get_labels(
    y: np.ndarray[typing.Any, typing.Any],
    classes: list[str] = ["Bacteria", "Fungi", "Healthy", "Pest", "Phytopthora", "Virus"],
) -> np.ndarray[typing.Any, typing.Any]:
    fxn = lambda x: classes[x]
    applyall = np.vectorize(fxn)
    return applyall(y)

## Running

In [None]:
# Load the images to predict
imgs_to_pred, filenames = load_images(img_dir)

# Preprocess images using DINOv2
processed_img = preprocess_img(imgs_to_pred)

# Make predictions
preds = loaded_model.predict(processed_img)

# Turn into actual labels
final_labels = get_labels(preds)

# Save as csv file
with open('../predictions/group8_svm_predictions.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["image_filename", "predicted_label"])
    writer.writerows(zip(filenames, final_labels))

  0%|          | 0/38 [00:00<?, ?it/s]

	Procesing batch 0
	Finished processing the batch
	Features taken using  DINOv2


  3%|▎         | 1/38 [00:00<00:32,  1.13it/s]

	Completed all batches for  DINOv2

	Procesing batch 8
	Finished processing the batch
	Features taken using  DINOv2


  5%|▌         | 2/38 [00:01<00:18,  1.94it/s]

	Completed all batches for  DINOv2

	Procesing batch 16
	Finished processing the batch
	Features taken using  DINOv2


  8%|▊         | 3/38 [00:01<00:13,  2.56it/s]

	Completed all batches for  DINOv2

	Procesing batch 24
	Finished processing the batch
	Features taken using  DINOv2


 11%|█         | 4/38 [00:01<00:12,  2.78it/s]

	Completed all batches for  DINOv2

	Procesing batch 32
	Finished processing the batch
	Features taken using  DINOv2


 13%|█▎        | 5/38 [00:01<00:10,  3.16it/s]

	Completed all batches for  DINOv2

	Procesing batch 40
	Finished processing the batch
	Features taken using  DINOv2


 16%|█▌        | 6/38 [00:02<00:09,  3.36it/s]

	Completed all batches for  DINOv2

	Procesing batch 48
	Finished processing the batch
	Features taken using  DINOv2


 18%|█▊        | 7/38 [00:02<00:08,  3.54it/s]

	Completed all batches for  DINOv2

	Procesing batch 56
	Finished processing the batch
	Features taken using  DINOv2


 21%|██        | 8/38 [00:02<00:08,  3.63it/s]

	Completed all batches for  DINOv2

	Procesing batch 64
	Finished processing the batch
	Features taken using  DINOv2


 24%|██▎       | 9/38 [00:02<00:07,  3.73it/s]

	Completed all batches for  DINOv2

	Procesing batch 72
	Finished processing the batch
	Features taken using  DINOv2


 26%|██▋       | 10/38 [00:03<00:07,  3.84it/s]

	Completed all batches for  DINOv2

	Procesing batch 80
	Finished processing the batch
	Features taken using  DINOv2


 29%|██▉       | 11/38 [00:03<00:07,  3.72it/s]

	Completed all batches for  DINOv2

	Procesing batch 88
	Finished processing the batch
	Features taken using  DINOv2


 32%|███▏      | 12/38 [00:03<00:06,  3.85it/s]

	Completed all batches for  DINOv2

	Procesing batch 96
	Finished processing the batch
	Features taken using  DINOv2


 34%|███▍      | 13/38 [00:03<00:06,  3.86it/s]

	Completed all batches for  DINOv2

	Procesing batch 104
	Finished processing the batch
	Features taken using  DINOv2


 37%|███▋      | 14/38 [00:04<00:06,  3.94it/s]

	Completed all batches for  DINOv2

	Procesing batch 112
	Finished processing the batch
	Features taken using  DINOv2


 39%|███▉      | 15/38 [00:04<00:05,  3.92it/s]

	Completed all batches for  DINOv2

	Procesing batch 120
	Finished processing the batch
	Features taken using  DINOv2


 42%|████▏     | 16/38 [00:04<00:05,  3.91it/s]

	Completed all batches for  DINOv2

	Procesing batch 128
	Finished processing the batch
	Features taken using  DINOv2


 45%|████▍     | 17/38 [00:04<00:05,  3.96it/s]

	Completed all batches for  DINOv2

	Procesing batch 136
	Finished processing the batch
	Features taken using  DINOv2


 47%|████▋     | 18/38 [00:05<00:05,  3.96it/s]

	Completed all batches for  DINOv2

	Procesing batch 144
	Finished processing the batch
	Features taken using  DINOv2


 50%|█████     | 19/38 [00:05<00:04,  3.88it/s]

	Completed all batches for  DINOv2

	Procesing batch 152
	Finished processing the batch
	Features taken using  DINOv2


 53%|█████▎    | 20/38 [00:05<00:04,  3.95it/s]

	Completed all batches for  DINOv2

	Procesing batch 160
	Finished processing the batch
	Features taken using  DINOv2


 55%|█████▌    | 21/38 [00:06<00:04,  3.85it/s]

	Completed all batches for  DINOv2

	Procesing batch 168
	Finished processing the batch
	Features taken using  DINOv2


 58%|█████▊    | 22/38 [00:06<00:04,  3.84it/s]

	Completed all batches for  DINOv2

	Procesing batch 176
	Finished processing the batch
	Features taken using  DINOv2


 61%|██████    | 23/38 [00:06<00:03,  3.93it/s]

	Completed all batches for  DINOv2

	Procesing batch 184
	Finished processing the batch
	Features taken using  DINOv2


 63%|██████▎   | 24/38 [00:06<00:03,  3.83it/s]

	Completed all batches for  DINOv2

	Procesing batch 192
	Finished processing the batch
	Features taken using  DINOv2


 66%|██████▌   | 25/38 [00:07<00:03,  3.90it/s]

	Completed all batches for  DINOv2

	Procesing batch 200
	Finished processing the batch
	Features taken using  DINOv2


 68%|██████▊   | 26/38 [00:07<00:03,  3.97it/s]

	Completed all batches for  DINOv2

	Procesing batch 208
	Finished processing the batch
	Features taken using  DINOv2


 71%|███████   | 27/38 [00:07<00:02,  3.86it/s]

	Completed all batches for  DINOv2

	Procesing batch 216
	Finished processing the batch
	Features taken using  DINOv2


 74%|███████▎  | 28/38 [00:07<00:02,  3.93it/s]

	Completed all batches for  DINOv2

	Procesing batch 224
	Finished processing the batch
	Features taken using  DINOv2


 76%|███████▋  | 29/38 [00:08<00:02,  3.87it/s]

	Completed all batches for  DINOv2

	Procesing batch 232
	Finished processing the batch
	Features taken using  DINOv2


 79%|███████▉  | 30/38 [00:08<00:02,  3.90it/s]

	Completed all batches for  DINOv2

	Procesing batch 240
	Finished processing the batch
	Features taken using  DINOv2


 82%|████████▏ | 31/38 [00:08<00:01,  3.92it/s]

	Completed all batches for  DINOv2

	Procesing batch 248
	Finished processing the batch
	Features taken using  DINOv2


 84%|████████▍ | 32/38 [00:08<00:01,  3.87it/s]

	Completed all batches for  DINOv2

	Procesing batch 256
	Finished processing the batch
	Features taken using  DINOv2


 87%|████████▋ | 33/38 [00:09<00:01,  3.94it/s]

	Completed all batches for  DINOv2

	Procesing batch 264
	Finished processing the batch
	Features taken using  DINOv2


 89%|████████▉ | 34/38 [00:09<00:01,  3.95it/s]

	Completed all batches for  DINOv2

	Procesing batch 272
	Finished processing the batch
	Features taken using  DINOv2


 92%|█████████▏| 35/38 [00:09<00:00,  3.91it/s]

	Completed all batches for  DINOv2

	Procesing batch 280
	Finished processing the batch
	Features taken using  DINOv2


 95%|█████████▍| 36/38 [00:09<00:00,  3.94it/s]

	Completed all batches for  DINOv2

	Procesing batch 288
	Finished processing the batch
	Features taken using  DINOv2


100%|██████████| 38/38 [00:10<00:00,  3.69it/s]

	Completed all batches for  DINOv2

	Procesing batch 296
	Finished processing the batch
	Features taken using  DINOv2
	Completed all batches for  DINOv2






## Checking

In [10]:
print(len(filenames), len(final_labels))

301 301


In [None]:
df = pd.read_csv("../predictions/group8_svm_predictions.csv")

display(df.head(), df.shape)
display(df['predicted_label'].value_counts())

Unnamed: 0,image_filename,predicted_label
0,0.jpeg,Virus
1,1.jpeg,Healthy
2,10.jpeg,Healthy
3,100.jpeg,Fungi
4,101.jpeg,Fungi


(301, 2)

predicted_label
Fungi          76
Pest           66
Bacteria       57
Virus          57
Phytopthora    28
Healthy        17
Name: count, dtype: int64