<a href="https://colab.research.google.com/github/Ruthuvikas/Transformer/blob/main/Model_optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Quantization of a tensor from fp32 to fp16

In [13]:
import torch

def quantize_example():
    # Create a sample tensor in fp32
    original_tensor = torch.randn(3, 4, dtype=torch.float32)

    print("Original Tensor (FP32):")
    print("Dtype:", original_tensor.dtype)
    print("Values:", original_tensor)
    print("Memory usage (bytes):", original_tensor.element_size() * original_tensor.nelement())

    # Convert to fp16
    quantized_tensor = original_tensor.half()

    print("\nQuantized Tensor (FP16):")
    print("Dtype:", quantized_tensor.dtype)
    print("Values:", quantized_tensor)
    print("Memory usage (bytes):", quantized_tensor.element_size() * quantized_tensor.nelement())

    # Demonstrate precision loss
    print("\nPrecision Comparison:")
    print("Max absolute difference:",
          torch.max(torch.abs(original_tensor - quantized_tensor.float())).item())

# Run the demonstration
quantize_example()

Original Tensor (FP32):
Dtype: torch.float32
Values: tensor([[ 0.3416,  1.3372,  0.1772,  0.8209],
        [-1.0045,  0.2167, -0.8730, -0.4546],
        [-0.4399, -1.0325, -0.2025, -2.4493]])
Memory usage (bytes): 48

Quantized Tensor (FP16):
Dtype: torch.float16
Values: tensor([[ 0.3416,  1.3369,  0.1772,  0.8208],
        [-1.0049,  0.2167, -0.8730, -0.4546],
        [-0.4399, -1.0322, -0.2025, -2.4492]], dtype=torch.float16)
Memory usage (bytes): 24

Precision Comparison:
Max absolute difference: 0.0003980398178100586


Pruning

In [1]:
import torch
import torch.nn as nn
import torch.nn.utils.prune as prune
import torch.optim as optim

# Define a simple neural network
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(10, 50)
        self.fc2 = nn.Linear(50, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize the model, criterion, and optimizer
model = SimpleNet()
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Example input and target
x = torch.randn(5, 10)
y = torch.randn(5, 1)

# Apply pruning to the first layer
print("Before pruning:")
print(model.fc1.weight)

prune.l1_unstructured(model.fc1, name="weight", amount=0.3)  # Prune 30% of the weights

print("After pruning:")
print(model.fc1.weight)
print("Pruned mask:")
print(model.fc1.weight_mask)

# Train the model with pruned weights
for epoch in range(5):
    optimizer.zero_grad()
    outputs = model(x)
    loss = criterion(outputs, y)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

# Remove pruning reparameterization if needed
prune.remove(model.fc1, name="weight")
print("After removing pruning:")
print(model.fc1.weight)


Before pruning:
Parameter containing:
tensor([[-7.0054e-02,  9.8435e-02,  7.0166e-03,  2.2374e-01,  9.3290e-02,
         -1.5916e-01,  1.4244e-01,  6.4996e-02, -1.1214e-01,  1.5585e-01],
        [-2.0116e-01,  2.6917e-01,  3.4827e-02, -6.3969e-02, -9.6950e-02,
          2.0849e-01, -1.3459e-01, -1.0771e-01,  3.6854e-02,  2.5579e-01],
        [ 1.5962e-02, -1.4013e-01,  2.3676e-01, -8.4859e-02,  5.5687e-02,
         -2.8981e-02,  1.1660e-01, -1.6555e-01,  3.0106e-01,  9.1430e-02],
        [-2.1639e-01, -1.7182e-01, -8.1013e-02, -3.0091e-01,  1.2596e-01,
         -8.3394e-02,  2.7196e-01,  1.6595e-01, -1.4153e-01,  1.9702e-01],
        [-3.0436e-01,  1.7374e-01,  1.3569e-01, -2.0500e-01, -3.1268e-01,
          1.7759e-01,  1.2436e-01,  3.5675e-02,  2.6614e-01, -2.2584e-01],
        [-2.2002e-01, -2.3496e-01,  2.9489e-01, -2.7864e-01, -2.2025e-02,
          1.5598e-01,  2.5190e-01, -1.1827e-01,  1.9689e-01, -2.3368e-01],
        [ 5.9743e-04, -1.2529e-01,  2.1133e-01, -2.8118e-01,  2.5549