In [None]:
#| include: false
import timm
from fastai.vision.all import *
from fasterai.quantize.quantizer import *

In [None]:
path = untar_data(URLs.PETS)
files = get_image_files(path/"images")

def label_func(f): return f[0].isupper()

dls = ImageDataLoaders.from_name_func(path, files, label_func, item_tfms=Resize(64))

In [None]:
pretrained_resnet_34 = timm.create_model('resnet34', pretrained=True)
learn = Learner(dls, pretrained_resnet_34, metrics=accuracy)
learn.model.fc = nn.Linear(512, 2)
learn.fit_one_cycle(5, 1e-3)

epoch,train_loss,valid_loss,accuracy,time
0,0.536314,0.409817,0.817997,00:03
1,0.327518,0.273514,0.889716,00:03
2,0.203315,0.206001,0.916779,00:03
3,0.116644,0.180373,0.932341,00:03
4,0.073957,0.182942,0.925575,00:03


In [None]:
quantizer = Quantizer(
    backend="x86",
    method="static",    # Use dynamic quantization
    verbose=True,       # See detailed output for debugging
    use_per_tensor=False
)

# Quantize your model
quantized_model = quantizer.quantize(
    model=learn.model,
    calibration_dl=dls.train,
)

Preparing model for static quantization with x86 backend
Calibrating with up to 100 samples


Calibrating: 100%|██████████| 1/1 [00:01<00:00,  1.33s/it]


Converting to quantized model
Quantization complete


In [None]:
#| include: false
from tqdm import tqdm

def get_model_size(model):
    torch.save(model.state_dict(), "temp.p")
    size = os.path.getsize("temp.p") / 1e6  # Size in MB
    os.remove("temp.p")
    return size
    
def compute_validation_accuracy(model, valid_dataloader, device=None):
    # Set the model to evaluation mode
    model.eval()
    
    # Use the model's device if no device is specified
    
    device = torch.device('cpu')
    
    # Move model to the specified device
    model = model.to(device)
    
    # Tracking correct predictions and total samples
    total_correct = 0
    total_samples = 0
    
    # Disable gradient computation for efficiency
    with torch.no_grad():
        for batch in tqdm(valid_dataloader):
            # Assuming batch is a tuple of (inputs, labels)
            # Adjust this if your dataloader returns a different format
            inputs, labels = batch
            
            # Move inputs and labels to the same device as the model
            inputs = torch.Tensor(inputs).to(device)
            labels = labels.to(device)
            
            # Forward pass
            outputs = model(inputs)
            
            # Get predictions (for classification tasks)
            # Use argmax along the class dimension
            _, predicted = torch.max(outputs, 1)
            
            # Update counters
            total_samples += labels.size(0)
            total_correct += (predicted == labels).sum().item()
    
    # Compute accuracy as a percentage
    accuracy = (total_correct / total_samples) * 100
    
    return accuracy

In [None]:
print(f'Size of the original model: {get_model_size(learn.model):.2f} MB')
print(f'Size of the quantized model: {get_model_size(quantized_model):.2f} MB')

Size of the original model: 85.27 MB
Size of the quantized model: 21.51 MB


In [None]:
compute_validation_accuracy(quantized_model, dls.valid)

100%|██████████| 24/24 [00:02<00:00,  9.64it/s]


89.37753721244925