In [None]:
import timm
import torch
from torch.utils.data import DataLoader 
import numpy as np
from torchvision import transforms
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from training_utils import *
import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
def extract_features(model, dataloader, device):
    features = []
    labels = []

    with torch.no_grad():
        for images, targets in dataloader:
            images = images.to(device)
            out = model(images)
            features.append(out.cpu().numpy())
            labels.append(targets.numpy())

    features = np.concatenate(features, axis=0)
    labels = np.concatenate(labels, axis=0)
    return features, labels

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

minority_augmentation = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.ToTensor()
])
label_map = {
    'badger': 0,
    'bird': 1,
    'boar': 2,
    'butterfly': 3,
    'cat': 4,
    'dog': 5,
    'fox': 6,
    'lizard': 7,
    'podolic_cow': 8,
    'porcupine': 9,
    'weasel': 10,
    'wolf': 11
}

In [None]:
# Dataset and DataLoader
train_df = pd.read_csv("data/train.csv")
augmented_df = augment_minority_classes(train_df, "data/labeled_img", "data/labeled_img_aug", minority_augmentation, min_samples=50)
train_ds = AnimalDataset(augmented_df, "data/labeled_img/", transform=transform, label_map=label_map, crop_bbox=True)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)

val_ds = AnimalDataset("data/val.csv", "data/labeled_img/", transform=transform, label_map=label_map, crop_bbox=True)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False)

test_ds = AnimalDataset("data/test.csv", "data/labeled_img/", transform=transform, label_map=label_map, crop_bbox=False)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False)

In [None]:
# Load ViT pre-trained model
model_feat = timm.create_model('vit_base_patch16_224', pretrained=True)
model_feat.reset_classifier(0)  # head removal for classification

model_feat.eval()
model_feat = model_feat.to(device)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=False)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False)

In [6]:
X_train, y_train = extract_features(model_feat, train_loader, device)
X_val, y_val = extract_features(model_feat, val_loader, device)

X_test, y_test = extract_features(model_feat, test_loader, device)

In [None]:
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_val)
y_test_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_test_pred)
f1 = f1_score(y_test, y_test_pred, average='macro')
recall = recall_score(y_test, y_test_pred, average='macro')
precision = precision_score(y_test, y_test_pred, average='macro')
cm = confusion_matrix(y_test, y_test_pred)
print("---------- LogisticRegression ----------")
print(f"Validation Recall: {recall:.7f}")
print(f"Validation Precision: {precision:.7f}")
print(f"Validation F1 Score: {f1:.7f}")
print(f"Validation Accuracy: {acc:.7f}")

---------- LogisticRegression ----------
Validation Recall: 0.7206547
Validation Precision: 0.7235740
Validation F1 Score: 0.7108709
Validation Accuracy: 0.8712871


In [9]:
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)

y_pred = decision_tree.predict(X_val)
y_test_pred = decision_tree.predict(X_test)
acc = accuracy_score(y_test, y_test_pred)
f1 = f1_score(y_test, y_test_pred, average='macro')
recall = recall_score(y_test, y_test_pred, average='macro')
precision = precision_score(y_test, y_test_pred, average='macro')
cm = confusion_matrix(y_test, y_test_pred)
print("---------- DecisionTreeClassifier ----------")
print(f"Validation Recall: {recall:.7f}")
print(f"Validation Precision: {precision:.7f}")
print(f"Validation F1 Score: {f1:.7f}")
print(f"Validation Accuracy: {acc:.7f}")

---------- DecisionTreeClassifier ----------
Validation Recall: 0.2496562
Validation Precision: 0.3122971
Validation F1 Score: 0.2661439
Validation Accuracy: 0.6633663


In [None]:
knn  = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train)
c
y_pred = knn.predict(X_val)
y_test_pred = knn.predict(X_test)
acc = accuracy_score(y_test, y_test_pred)
f1 = f1_score(y_test, y_test_pred, average='macro')
recall = recall_score(y_test, y_test_pred, average='macro')
precision = precision_score(y_test, y_test_pred, average='macro')
cm = confusion_matrix(y_test, y_test_pred)
print("---------- KNeighborsClassifier k = 1 ----------")
print(f"Validation Recall: {recall:.7f}")
print(f"Validation Precision: {precision:.7f}")
print(f"Validation F1 Score: {f1:.7f}")
print(f"Validation Accuracy: {acc:.7f}")

---------- KNeighborsClassifier k = 1 ----------
Validation Recall: 0.8258560
Validation Precision: 0.8161111
Validation F1 Score: 0.8115815
Validation Accuracy: 0.8712871
