In [None]:
import os
import torch
import torch.nn as nn
from torch.quantization import quantize_dynamic

# Assuming you have already defined and loaded your model
# Example: model = YourModelClass()

# Define the device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the model state dict
current_dir = os.path.dirname(os.path.abspath(__file__))
model_path = os.path.join(current_dir, "best_model.pth")
model.load_state_dict(torch.load(model_path, map_location=device))

# Move the model to the appropriate device
model.to(device)

# Apply dynamic quantization to the model
# Quantize the Linear layers to torch.qint8
quantized_model = quantize_dynamic(
    model, {nn.Linear}, dtype=torch.qint8
)

# Save the quantized model to a new .pth file
quantized_model_path = os.path.join(current_dir, "quantized_model.pth")
torch.save(quantized_model.state_dict(), quantized_model_path)

print(f"Quantized model saved to {quantized_model_path}")