# Setup
- Change `MODEL` to use different models: `tiny`, `tiny_longer`, `small`, `small_pretrained`

In [1]:
import sys
import os
from dotenv import load_dotenv
import numpy as np

load_dotenv()
sys.path.append(os.getenv('CODE_ROOT_PATH')) # Add path

from src.models.predict_model import extract_feature_pipeline

MODEL = 'tiny'
FEATURES_PATH = os.path.join(os.getenv('TRAINED_MODELS'), MODEL, 'features.npy')
PRETRAINED_WEIGHTS_PATH = os.path.join(os.getenv('TRAINED_MODELS'), MODEL, 'checkpoint.pth')

# Extract Features
- Change `arg_arch` to match the model: `vit_tiny`, `vit_small`

In [2]:
features = extract_feature_pipeline(arg_arch='vit_tiny',
                                    arg_batch_size = 1024,
                                    arg_pretrained_weights = PRETRAINED_WEIGHTS_PATH)
print(f"Generated {features.shape[0]} features of dimension {features.shape[1]}.")
np.save(FEATURES_PATH, features)

# Inspect Features
- **Summary: The features generated by a model have similar length**
## Tiny
- Mean: 32.325293515880766
- Std: 0.4926768531511307
- Range: 4.45210247002208
## Tiny Longer
- Mean: 42.21524291862812
- Std: 0.629201881169106
- Range: 5.481508860327708
## Small
- Mean: 36.92485063952389
- Std: 0.419585597475003
- Range: 3.3206546627951923
## Small Pretrained
- Mean: 36.88864870123054
- Std: 0.482657103533802
- Range: 3.957075444964552

In [3]:
features = np.load(FEATURES_PATH)
norms = [np.linalg.norm(features[i]) for i in range(len(features))]
print(np.mean(norms))
print(np.std(norms))
print(np.max(norms)-np.min(norms))

32.325293515880766
0.4926768531511307
4.45210247002208
