# Finally sorting the dataset

In [1]:
import os
import fnmatch

def find_images(directory):
    image_files = []
    for root, dirnames, filenames in os.walk(directory):
        for filename in filenames:
            if fnmatch.fnmatch(filename, '*.png') or fnmatch.fnmatch(filename, '*.jpg') or fnmatch.fnmatch(filename, '*.jpeg'):
                image_files.append(os.path.join(root, filename))
    return image_files

# image_list = find_images('/work/2023_annot/images')

In [2]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import v2
from PIL import Image, ImageDraw
from tqdm import tqdm
import random
import matplotlib.pyplot as plt
import numpy as np
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
import segmentation_models_pytorch as smp
from segmentation_models_pytorch.encoders import get_preprocessing_fn
import torch.nn as nn
print("before creating UNET")
preprocess_input = get_preprocessing_fn('resnet34', pretrained='imagenet')
model = smp.Unet('resnet34', encoder_weights='imagenet', in_channels=3, classes=4, activation=None)
print("before loasding UNET")
model.load_state_dict(torch.load('/work/multiclass_multiyear_UNET_sm.pth', map_location=device,weights_only=True))
model = model.to(device)
print("after loading UNET")
classifier = torch.load('/work/best_classifier.pth')

Using device: cuda
before creating UNET
before loasding UNET
after loading UNET


  classifier = torch.load('/work/best_classifier.pth')


In [3]:
from PadSquare import PadSquare
import json
def predict_filepaths(
    job_dir,
    model_name,
    classifier_name, 
    model,           # Your trained UNET (or similar) model
    best_classifier, # Your trained logistic regression classifier
    device
):
    file_paths = find_images(job_dir)
    model.eval()
    
    transform = transforms.v2.Compose([
        PadSquare(0), 
        v2.Resize((512, 512)), 
        v2.ToImage()
        ])
    if job_dir.endswith('/'):
        job_dir = job_dir[:-1]
    json_file = f"{job_dir.split('/')[-1]}_{model_name}_{classifier_name}_{device}.json"
    
    if os.path.exists(json_file):
        with open(json_file, 'r') as f:
            data = json.load(f)
    else:
        data = {}
    for index, path in enumerate(file_paths):
        if path in data:
            print(f"Skipping {path}")
            continue
        else:    
            print(f"Processing {path}")
            # 1) Load image
            try:
                image = Image.open(path).convert('RGB')
            except:
                print(f"Could not open {path}")
                continue                
            # 2) Apply transforms
            img_t = transform(image)
            img_pp = preprocess_input(img_t.permute(1, 2, 0).numpy()) 
            print(f"img_pp shape: {img_pp.shape}")
            img_pp = torch.from_numpy(img_pp).permute(2, 0, 1).float().unsqueeze(0)
            with torch.no_grad():
                # Extract features
                feats = model.encoder(img_pp.to(device))[-1]  # shape [B, C, H, W]
                feats = feats.view(feats.size(0), feats.size(1), -1).mean(dim=-1).cpu().numpy()
                
                # 3) Make classifier prediction
                pred = best_classifier.predict(feats)[0]
                data[path] = int(pred)
                
                print(f"index: {index} Predicted {path} as {pred}")    

        if index % 100 == 0:
            with open(json_file, 'w') as f:
                json.dump(data, f)


In [None]:
predict_filepaths(
    "/media/jcristia/Brace_Root_/2021_brobot_phenotyping/Lodging Hybrids/",
    "UNET",
    "logistic_regression", 
    model,         
    classifier,
    device
)

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



Could not open /media/jcristia/Brace_Root_/2021_brobot_phenotyping/Mutant/20210810_124656/._0000001.jpeg
Skipping /media/jcristia/Brace_Root_/2021_brobot_phenotyping/Mutant/20210810_124656/0000061.jpeg
Skipping /media/jcristia/Brace_Root_/2021_brobot_phenotyping/Mutant/20210810_124656/0000000.jpeg
Skipping /media/jcristia/Brace_Root_/2021_brobot_phenotyping/Mutant/20210810_124656/0000063.jpeg
Skipping /media/jcristia/Brace_Root_/2021_brobot_phenotyping/Mutant/20210810_124656/0000001.jpeg
Skipping /media/jcristia/Brace_Root_/2021_brobot_phenotyping/Mutant/20210810_124656/0000062.jpeg
Skipping /media/jcristia/Brace_Root_/2021_brobot_phenotyping/Mutant/20210810_124656/0000002.jpeg
Skipping /media/jcristia/Brace_Root_/2021_brobot_phenotyping/Mutant/20210810_124656/0000064.jpeg
Skipping /media/jcristia/Brace_Root_/2021_brobot_phenotyping/Mutant/20210810_124656/0000003.jpeg
Skipping /media/jcristia/Brace_Root_/2021_brobot_phenotyping/Mutant/20210810_124656/0000065.jpeg
Skipping /media/jcrist