<a href="https://colab.research.google.com/github/AQEEL-AWAN2362/NLP-Tutorial/blob/main/quantization_ptq.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
# importing dependencies
import torch
from torch import nn  # for loss
import torch.nn.functional as F   # for optimizer
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np


In [21]:
x,y = make_moons(n_samples=5000,noise=0.5,random_state=42)

In [22]:
print(x,  "shape of x: " ,  x.shape)

[[ 0.64527536  1.38251014]
 [ 0.14514823 -0.32157033]
 [ 0.11945131  0.41631146]
 ...
 [ 0.6360473   0.66530771]
 [ 1.61542641 -0.24249711]
 [ 0.10599548  1.0899585 ]] shape of x:  (5000, 2)


In [23]:
# import matplotlib.pyplot as plt

# X, y = make_moons(n_samples=500, noise=0.2, random_state=42)
# plt.scatter(X[:, 0], X[:, 1], c=y, cmap='bwr')
# plt.title("make_moons Dataset")
# plt.xlabel("Feature 1")
# plt.ylabel("Feature 2")
# plt.show()

In [24]:
# normalize the data
scaler = StandardScaler()
x = scaler.fit_transform(x)
x= torch.tensor(x, dtype=torch.float32)  # making ready for torch type
y= torch.tensor(y.reshape(-1,1), dtype=torch.float32)

In [25]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [26]:
# initiating model with forward pass
class Big_MLP(nn.Module):
 def __init__(self):
    super().__init__()
    self.fc1 = nn.Linear(2,128)
    self.fc2 = nn.Linear(128,64)
    self.fc3 = nn.Linear(64,64)
    self.fc4 = nn.Linear(64,32)
    self.fc5 = nn.Linear(32,16)
    self.fc6 = nn.Linear(16,8)
    self.fc7 = nn.Linear(8,1)

 def forward(self,x):    # getting output of each layer by (input*weights)activation
  x= F.relu(self.fc1(x))
  x= F.relu(self.fc2(x))
  x= F.relu(self.fc3(x))
  x= F.relu(self.fc4(x))
  x= F.relu(self.fc5(x))
  x= F.relu(self.fc6(x))
  x= torch.sigmoid(self.fc7(x))
  return x

In [27]:
model_fp32= Big_MLP()

In [28]:
# checking all learnable parameters(weights and biases) and name
# for name, param in model_fp32.named_parameters():
#   print(name, param.shape)

In [29]:
# backword pass (calculating loss and optimizing it with optimizer e.g adamW)
loss_fn = nn.BCELoss()
# optimizer = torch.optim.AdamW(model_fp32.parameters(), lr=0.001)
optimizer = torch.optim.Adam(model_fp32.parameters(), lr=0.01)

for epoch in range(2000):
    model_fp32.train()  # train the model
    y_pred = model_fp32(x_train)
    loss = loss_fn(y_pred, y_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # ✅ Must be indented inside the loop
    if epoch % 1000 == 0:
        print(f"Epoch {epoch} | Loss: {loss.item():.4f}") # tensor(0.3478, grad_fn=<MseLossBackward>) so to convert it into python float we use .item()


Epoch 0 | Loss: 0.6937
Epoch 1000 | Loss: 0.3649


In [30]:
# evaluate the model
def accuracy(model, x, y):
    model.eval()
    with torch.no_grad():

        preds = model(x)

        preds = (preds > 0.5).float()

        return (preds == y).float().mean().item() # We compare predicted == y_train to count how many are correct.


print(f"Accuracy: {accuracy(model_fp32, x_test, y_test) * 100:.2f}%")





Accuracy: 81.10%


 # **QUANTIZED THE MODEL NOW**


In [31]:
from torch.quantization import quantize_dynamic # weights only quantized

In [32]:
# real dynamic quantization
model_int8 = quantize_dynamic(
    model_fp32,   # original model
    {nn.Linear},  # Which layers to quantize
    dtype= torch.qint8   # quantized type
)

For migrations of users: 
1. Eager mode quantization (torch.ao.quantization.quantize, torch.ao.quantization.quantize_dynamic), please migrate to use torchao eager mode quantize_ API instead 
2. FX graph mode quantization (torch.ao.quantization.quantize_fx.prepare_fx,torch.ao.quantization.quantize_fx.convert_fx, please migrate to use torchao pt2e quantization API instead (prepare_pt2e, convert_pt2e) 
3. pt2e quantization has been migrated to torchao (https://github.com/pytorch/ao/tree/main/torchao/quantization/pt2e) 
see https://github.com/pytorch/ao/issues/2259 for more details
  model_int8 = quantize_dynamic(


In [33]:
model_int8

Big_MLP(
  (fc1): DynamicQuantizedLinear(in_features=2, out_features=128, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
  (fc2): DynamicQuantizedLinear(in_features=128, out_features=64, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
  (fc3): DynamicQuantizedLinear(in_features=64, out_features=64, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
  (fc4): DynamicQuantizedLinear(in_features=64, out_features=32, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
  (fc5): DynamicQuantizedLinear(in_features=32, out_features=16, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
  (fc6): DynamicQuantizedLinear(in_features=16, out_features=8, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
  (fc7): DynamicQuantizedLinear(in_features=8, out_features=1, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
)

In [34]:
print("INT8 Quantized Accuracy:", accuracy(model_int8, x_test, y_test))

INT8 Quantized Accuracy: 0.6520000100135803


In [38]:
# reduced parameters after dynamic quantization
print("Before:", sum(p.numel() for p in model_fp32.parameters()))
print("After:", sum(p.numel() for p in model_int8.parameters()))

Before: 15553
After: 0


In [35]:
import os
torch.save(model_fp32.state_dict(), "model_fp32.pt")
torch.save(model_int8.state_dict(), "model_dynamic_int8.pt")

In [36]:
print("FP32 model size (MB):", os.path.getsize("model_fp32.pt") / 1e6)
print("INT8 model size (MB):", os.path.getsize("model_dynamic_int8.pt") / 1e6)

FP32 model size (MB): 0.067317
INT8 model size (MB): 0.026373
