<a href="https://colab.research.google.com/github/JericN/rice-disease-classifier/blob/main/results_inference_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install --quite -U fvcore

In [1]:
from transformers import AutoModelForImageClassification
import torch
from torch.profiler import profile, record_function, ProfilerActivity

In [2]:
from fvcore.nn import FlopCountAnalysis
from fvcore.nn import ActivationCountAnalysis

In [275]:
# model_name = "SodaXII/vit-base-patch16-224_rice-leaf-disease-augmented-v4_v5_fft"
# model_name = "SodaXII/mobilevit-small_rice-leaf-disease-augmented-v4_v5_fft"
# model_name = "SodaXII/efficientformer_l1.snap_dist_in1k_rice-leaf-disease-augmented-v4_v5_fft"
model_name = "SodaXII/deit-base-patch16-224_rice-leaf-disease-augmented-v4_v5_fft"

model = AutoModelForImageClassification.from_pretrained(model_name).to("cuda")
model.eval()

ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTAttention(
            (attention): ViTSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_features=768, out_features=3072, bias=True)
            (intermed

In [268]:
inputs = torch.randn(1, 3, 224, 224)  # Adjust input size as needed

# Perform a forward pass to collect activation counts
activation_counts = ActivationCountAnalysis(model, inputs)

# Perform a forward pass to collect FLOP counts
flop_counts = FlopCountAnalysis(model, inputs)

# Print the results
print("FLOP Counts:", flop_counts.total())



FLOP Counts: 16866650880


In [262]:
# Create CUDA events for timing
start_event = torch.cuda.Event(enable_timing=True)
end_event = torch.cuda.Event(enable_timing=True)

# Load the model and move it to GPU
# model_name = "SodaXII/vit-base-patch16-224_rice-leaf-disease-augmented-v4_v5_fft"
# model_name = "SodaXII/mobilevit-small_rice-leaf-disease-augmented-v4_v5_fft"
# model_name = "SodaXII/efficientformer_l1.snap_dist_in1k_rice-leaf-disease-augmented-v4_v5_fft"
# model_name = "SodaXII/deit-base-patch16-224_rice-leaf-disease-augmented-v4_v5_fft"
model_name = "SodaXII/swin-base-patch4-window7-224_rice-leaf-disease-augmented-v4_v5_fft"

model = AutoModelForImageClassification.from_pretrained(model_name).to("cuda")
model.eval()

# Create a dummy input tensor and move it to GPU
input_tensor = torch.randn(1, 3, 224, 224).to("cuda")

# # Warm-up runs (optional but recommended)
# for _ in range(100):
#     with torch.no_grad():
#         _ = model(input_tensor)

# Reset peak memory statistics before inference
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()



# Record the start event
start_event.record()

# Run inference
with torch.no_grad():
    _ = model(input_tensor)

# Record the end event
end_event.record()

# Wait for the events to be recorded
torch.cuda.synchronize()

# Calculate elapsed time in milliseconds
inference_time_ms = start_event.elapsed_time(end_event)

# Retrieve peak memory usage
peak_memory = torch.cuda.max_memory_allocated()

# Print the results
print(f"Inference time: {inference_time_ms:.2f} ms")
print(f"Peak GPU memory usage: {peak_memory / (1024 ** 2):.2f} MB")

Inference time: 25.11 ms
Peak GPU memory usage: 1135.70 MB


In [301]:
inputs = torch.randn(1, 3, 224, 224).to("cuda")  # Adjust input size as needed

with profile(
    activities=[ ProfilerActivity.CPU, ProfilerActivity.CUDA],
    with_flops=True,
    profile_memory=True,
) as prof:
  with record_function("model_inference"):
    model(inputs)

# print(prof.key_averages().table(sort_by="cpu_time_total", row_limit=500))
data = prof.profiler
print(data)

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  --------------------------------------------------------------------------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls                                                                      Input Shapes  Total KFLOPs  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ---------------------------------------------