# Implementation: Post-Training Quantization (PyTorch)

**Goal**: Shrink the model.

In [None]:
import torch
import torch.nn as nn
import torch.quantization

# 1. Define Model
class M(nn.Module):
    def __init__(self):
        super().__init__()
        self.quant = torch.quantization.QuantStub()
        self.fc = nn.Linear(10, 10)
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = self.fc(x)
        x = self.dequant(x)
        return x

model_fp32 = M()

# 2. Prepare
model_fp32.eval()
model_fp32.qconfig = torch.quantization.get_default_qconfig('fbgemm')
model_prepared = torch.quantization.prepare(model_fp32)

# 3. Calibrate (Pass some dummy data so it learns the range of values)
input_fp32 = torch.randn(4, 1, 10, 10)
model_prepared(torch.randn(10, 10))

# 4. Convert
model_int8 = torch.quantization.convert(model_prepared)

print("Model converted to Int8.")
print(model_int8)

## Conclusion
The model is now ready for mobile deployment.