In [1]:
import os
import torch
import torchvision
from d2l import torch as d2l
%matplotlib inline
import matplotlib.pyplot as plt
from models.utils import *
import torch
import torch.nn as nn
import torch.nn.functional as F
import models

In [2]:
import torch
from torch.utils.data import Dataset
import torchvision.transforms.v2 as transforms
from sklearn.model_selection import train_test_split
import numpy as np

class VOCSegmentation(Dataset):
    def __init__(self, images_path = '/mnt/beegfs/ksanka/semiFL/images.npy', masks_path = '/mnt/beegfs/ksanka/semiFL/masks.npy',split = 'train'):
        self.split = split
        self.train_indices, self.test_indices = train_test_split(range(2913), test_size=0.3, random_state=42)

        if split == 'train':
            # self.transform =  transforms.Compose([
            #                     #transforms.ColorJitter(contrast=0.5),
            #                     transforms.RandomHorizontalFlip(0.5),
            #                     transforms.RandomRotation(30),
            #                     #transforms.ToTensor()
            #                     #transforms.CenterCrop(480),
            #                 ])
            self.transform = None
            self.ind = np.array(self.train_indices)

        else:
            self.transform = None#transforms.ToTensor()
            self.ind = np.array(self.test_indices)
        self.images_path = images_path
        self.data = np.load(images_path, mmap_mode='r')#[self.ind,:]
        self.target = np.load(masks_path, mmap_mode='r')#[self.ind,:]
        self.id = np.arange(len(self.target))#[self.ind,:]
        self.other = {'id': self.id}
        self.target_size = self.target[0].shape

    def __len__(self):        
        return len(self.ind)
        # if self.split == 'train':
        #     return len(self.train_indices)
        # elif self.split == "test":
        #     return len(self.test_indices)
        # else:
        #     raise ValueError("split must be either train or test")
    
        #return len(self.images)  # Assuming images and masks have the same length
    
    def __repr__(self):
        fmt_str = 'Dataset {}\nSize: {}\nRoot: {}\nSplit: {}\nTransforms: {}'.format(
            self.__class__.__name__, self.__len__(), self.images_path, self.split,self.transform.__repr__())
        return fmt_str

    def __getitem__(self, idx):
        idx = self.ind[idx]
        image = self.data[idx]
        #print(idx)
        mask = self.target[idx]
        other = {'id' : self.id[idx]}
        #id = self.id[idx]

        # Apply transformations if provided
        inp = {**other, 'data':torch.tensor(image), 'target' : torch.tensor(mask)}
        if self.transform:
            inp = self.transform(inp)
            #image = self.transform(image)
            #image,mask = self.transform(image,mask)
            #mask = self.transform(mask)
        return inp



class SimpleDataset(Dataset):
    def __init__(self):
        #self.num_samples = num_samples
        self.data = torch.rand(100,3,128,128)
        self.target = torch.rand(100,1,128,128)
        self.transform = None
        self.ind = np.arange(100)


    def __len__(self):
        return len(self.ind)
        #return self.num_samples

    def __getitem__(self, idx):
        didx = self.ind[idx]
        data = torch.tensor(self.data[didx,:])
        tgt = self.target[idx]
        inp = {'data': data , 'target':tgt}
        #print(data.shape,tgt.shape)
        if self.transform:
            inp = self.transform(inp)
            #return {'data':self.transform(torch.tensor(self.data[didx,:])), 'target':self.target[idx]} # 'target':self.transform(self.target[idx])
        return inp

device = 'cuda'

ds_train = VOCSegmentation(split = 'train')
ds_test = VOCSegmentation(split = 'test')

trainloader = torch.utils.data.DataLoader(ds_train, batch_size=12, shuffle=True)
testloader = torch.utils.data.DataLoader(ds_test, batch_size=12, shuffle=True)


In [3]:
from torchmetrics.classification import Dice

dice1 = Dice(average='macro',num_classes = 21,ignore_index = 0).to('cuda')
dice2 = Dice(average='macro',num_classes = 21).to('cuda')
def compute_iou(pred_mask,true_mask):
    '''
    Compute dice score by ignoring background
    '''
    
    #print(pred_mask.shape,true_mask.shape)
    pred_mask = torch.argmax(pred_mask,1)
    return dice1(pred_mask.to('cuda'), true_mask.to('cuda')).item()
    # intersection = np.logical_and(true_mask, pred_mask)
    # union = np.logical_or(true_mask, pred_mask)
    # iou_score = np.sum(intersection) / np.sum(union)
    # return iou_score

def compute_dice_coefficient(pred_mask,true_mask):
    pred_mask = torch.argmax(pred_mask,1)
    #dice = Dice(average='macro',num_classes = 21).to(pred_mask.device)
    return dice2(pred_mask.to('cuda'), true_mask.to('cuda')).item()


def compute_pixel_accuracy(pred_mask,true_mask):
    pred_mask = torch.argmax(pred_mask,1)
    correct_pixels = torch.sum(true_mask == pred_mask)
    total_pixels = true_mask.view(-1).size()[0]
    pixel_accuracy = correct_pixels / total_pixels
    return pixel_accuracy.item()


In [4]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', weights='DeepLabV3_ResNet50_Weights.COCO_WITH_VOC_LABELS_V1').to(device)


Using cache found in /home/ksanka/.cache/torch/hub/pytorch_vision_v0.10.0


In [5]:
model.eval()

DeepLabV3(
  (backbone): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Se

In [6]:
quantized_full_model = torch.quantization.quantize_dynamic(model, dtype=torch.qint8)


In [7]:
criterion = nn.CrossEntropyLoss()
#optimizer = torch.optim.SGD(model.classifier.parameters(), lr=0.03)

def eval(model,test):
    metrics = {'loss':[],'pa':[],'dice':[],'iou':[]}
    model.eval()
    with torch.no_grad():
        for i,batch in enumerate(test):
            inputs,labels=  batch['data'].to(device),batch['target'].to(device)
            #input,labels = next(iter(trainloader))
            #input,labels = input.to(device),labels.to(device)
            outputs = model(inputs)['out']
            #print(outputs.keys())
            loss = criterion(outputs, labels)
            #pred = torch.max(torch.softmax(outputs,1))
            pa = compute_pixel_accuracy(outputs,labels)
            dice = compute_dice_coefficient(outputs,labels)
            iou = compute_iou(outputs,labels)
            metrics['loss'].append(loss)
            metrics['pa'].append(pa)
            metrics['dice'].append(dice)
            metrics['iou'].append(iou)
        print(f"mean loss = {torch.mean(torch.tensor(metrics['loss']))} , mean pa = {torch.mean(torch.tensor(metrics['pa']))}, mean dice = {torch.mean(torch.tensor(metrics['dice']))}")
            
            
        


In [10]:
batch = next(iter(testloader))

In [11]:
inp = batch['data'].to('cuda')
inp.shape

torch.Size([12, 3, 256, 256])

In [12]:
model = model.to('cuda')

In [8]:
eval(model,testloader)

mean loss = 0.27691471576690674 , mean pa = 0.9041972756385803, mean dice = 0.6236916184425354


In [9]:
eval(model,testloader)

mean loss = 0.27780094742774963 , mean pa = 0.9039435982704163, mean dice = 0.6350855231285095


In [39]:
eval(quantized_full_model,testloader)

mean loss = 0.27666449546813965 , mean pa = 0.9039584398269653, mean dice = 0.7177595496177673


In [16]:
op = model(batch['data'].to('cuda'))

In [17]:
torch.cuda.reset_peak_memory_stats()
op = model(batch['data'].to('cuda'))
max_memory_allocated = torch.cuda.max_memory_allocated() / (1024 ** 2)  # MB
max_memory_reserved = torch.cuda.max_memory_reserved() / (1024 ** 2)  # MB

In [18]:
print(max_memory_allocated)

6727.205078125


In [19]:
max_memory_reserved

6988.0

In [36]:
op = model_quantized(batch['data'].to('cuda'))


In [29]:
op = quantized_model_conv(batch['data'].to('cuda'))


In [37]:
torch.cuda.reset_peak_memory_stats()
op = model_quantized(batch['data'].to('cuda'))
max_memory_allocated = torch.cuda.max_memory_allocated() / (1024 ** 2)  # MB
max_memory_reserved = torch.cuda.max_memory_reserved() / (1024 ** 2)  # MB

In [38]:
max_memory_allocated, max_memory_reserved

(7213.92138671875, 7494.0)

In [25]:
quantized_model_conv = torch.quantization.quantize_dynamic(
    model, {torch.nn.Conv2d}, dtype=torch.qint8
)

In [13]:
import torch
import torchvision.models as models
from torch.profiler import profile, record_function, ProfilerActivity

In [32]:
import torch.nn as nn
from torch.quantization import QuantStub, DeQuantStub

# Define mapping for dynamic quantization of all layers
dynamic_quantization_mapping = {
    nn.Conv1d: nn.quantized.Conv1d,
    nn.Conv2d: nn.quantized.Conv2d,
    nn.Conv3d: nn.quantized.Conv3d,
    nn.ConvTranspose1d: nn.quantized.ConvTranspose1d,
    nn.ConvTranspose2d: nn.quantized.ConvTranspose2d,
    nn.ConvTranspose3d: nn.quantized.ConvTranspose3d,
    nn.Linear: nn.quantized.Linear,
    nn.ReLU: QuantStub,  # Replace with quantized version
    nn.ReLU6: QuantStub,
    nn.BatchNorm2d: nn.quantized.BatchNorm2d,
    nn.BatchNorm3d: nn.quantized.BatchNorm3d,
    # Add more layers as needed
}

In [33]:
model_fp32 = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', weights='DeepLabV3_ResNet50_Weights.COCO_WITH_VOC_LABELS_V1').to(device)

model_quantized = torch.quantization.quantize_dynamic(
    model_fp32, mapping=dynamic_quantization_mapping, dtype=torch.qint8
)

Using cache found in /home/ksanka/.cache/torch/hub/pytorch_vision_v0.10.0


In [34]:
model_quantized

DeepLabV3(
  (backbone): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Se

In [40]:
model_quantized.state_dict()

OrderedDict([('backbone.conv1.weight',
              tensor([[[[ 2.5033e-03,  1.0390e-03, -5.1337e-03,  ..., -3.5699e-02,
                         -4.7453e-02, -6.7587e-02],
                        [-6.3906e-03,  6.0558e-03,  3.3751e-02,  ...,  2.9455e-02,
                         -5.6533e-03, -1.3758e-02],
                        [-3.6212e-03, -5.5701e-04,  1.1559e-02,  ...,  1.1007e-01,
                          9.0378e-02,  8.8566e-02],
                        ...,
                        [ 2.8753e-02,  3.5205e-02, -4.1636e-03,  ..., -1.2127e-01,
                         -8.6888e-02, -4.9437e-02],
                        [ 4.6666e-02,  8.7159e-02,  1.1415e-01,  ...,  5.0452e-02,
                          2.9312e-03, -3.2462e-02],
                        [-7.2365e-02, -4.5505e-02, -1.0001e-02,  ...,  5.0157e-02,
                          4.8468e-02,  6.5176e-03]],
              
                       [[ 2.8648e-03,  2.0979e-02,  3.7938e-02,  ...,  3.5522e-02,
                       

In [None]:
import torch
import torch.quantization

# Load the pre-trained model
model  = torch.hub.load('pytorch/vision:v0.10.0', 'deeplabv3_resnet50', weights='DeepLabV3_ResNet50_Weights.COCO_WITH_VOC_LABELS_V1').to(device).eval()

modules_to_fuse = [
    ['conv1', 'bn1', ],
    ['conv2', 'bn2', ],
    # Add more module patterns to fuse
]

# torch.quantization.fuse_modules(model.backbone, modules_to_fuse,inplace=True)
# torch.quantization.fuse_modules(model.classifier, modules_to_fuse,inplace=True)



# Set the quantization configuration
qconfig = torch.quantization.get_default_qconfig('qnnpack')
#qnnpack, fbgemm
# Prepare the model for static quantization
model.qconfig = qconfig
torch.quantization.prepare(model, inplace=True)


# Calibrate the quantization parameters on a representative dataset
calibration_loader = trainloader

for batch in calibration_loader:
    inputs, labels = batch['data'].to(device),batch['target'].to(device)
    model(inputs)

# # Convert the model to quantized form
torch.quantization.convert(model, inplace=True)

# # Optionally, offload the quantized model to CPU for inference
# model = model.to('cpu')

# # Evaluate the quantized model
# eval_loader = your_evaluation_dataloader
# accuracy = 0.0
# for batch in eval_loader:
#     inputs, labels = batch
#     outputs = model(inputs)
#     accuracy += compute_accuracy(outputs, labels)

# print(f'Quantized model accuracy: {accuracy / len(eval_loader)}')


Using cache found in /home/ksanka/.cache/torch/hub/pytorch_vision_v0.10.0


In [63]:
def get_fusible_module_patterns(model):
    fusible_patterns = []
    for name, module in model.named_modules():
        if isinstance(module, (torch.nn.Conv2d, torch.nn.Linear)):
            conv_bn_relu = []
            parent_name = name
            for child_name, child_module in module.named_children():
                full_child_name = parent_name + '.' + child_name if parent_name else child_name
                if isinstance(child_module, torch.nn.BatchNorm2d):
                    conv_bn_relu.append(full_child_name)
                elif isinstance(child_module, torch.nn.ReLU):
                    conv_bn_relu.append(full_child_name)
                    fusible_patterns.append(conv_bn_relu)
                    break
    return fusible_patterns


In [69]:
model.classifier

DeepLabHead(
  (0): ASPP(
    (convs): ModuleList(
      (0): Sequential(
        (0): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
      )
      (1): ASPPConv(
        (0): Conv2d(2048, 256, kernel_size=(3, 3), stride=(1, 1), padding=(12, 12), dilation=(12, 12), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
      )
      (2): ASPPConv(
        (0): Conv2d(2048, 256, kernel_size=(3, 3), stride=(1, 1), padding=(24, 24), dilation=(24, 24), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
      )
      (3): ASPPConv(
        (0): Conv2d(2048, 256, kernel_size=(3, 3), stride=(1, 1), padding=(36, 36), dilation=(36, 36), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_

In [68]:
model_params = list(model.named_parameters())

# Print names and parameters
for name, param in model_params:
    print(f"Name: {name}, Shape: {param.shape}")



Name: backbone.conv1.weight, Shape: torch.Size([64, 3, 7, 7])
Name: backbone.conv1.bias, Shape: torch.Size([64])
Name: backbone.layer1.0.conv1.weight, Shape: torch.Size([64, 64, 1, 1])
Name: backbone.layer1.0.bn1.weight, Shape: torch.Size([64])
Name: backbone.layer1.0.bn1.bias, Shape: torch.Size([64])
Name: backbone.layer1.0.conv2.weight, Shape: torch.Size([64, 64, 3, 3])
Name: backbone.layer1.0.bn2.weight, Shape: torch.Size([64])
Name: backbone.layer1.0.bn2.bias, Shape: torch.Size([64])
Name: backbone.layer1.0.conv3.weight, Shape: torch.Size([256, 64, 1, 1])
Name: backbone.layer1.0.bn3.weight, Shape: torch.Size([256])
Name: backbone.layer1.0.bn3.bias, Shape: torch.Size([256])
Name: backbone.layer1.0.downsample.0.weight, Shape: torch.Size([256, 64, 1, 1])
Name: backbone.layer1.0.downsample.1.weight, Shape: torch.Size([256])
Name: backbone.layer1.0.downsample.1.bias, Shape: torch.Size([256])
Name: backbone.layer1.1.conv1.weight, Shape: torch.Size([64, 256, 1, 1])
Name: backbone.layer1.1

In [71]:
import torch
from torch.quantization import fuse_modules

def get_fusible_module_patterns(model):
    fusible_patterns = []
    for name, module in model.named_modules():
        if isinstance(module, (torch.nn.Conv2d, torch.nn.Linear)):
            conv_bn_relu = []
            parent_name = name
            for child_name, child_module in module.named_children():
                full_child_name = parent_name + '.' + child_name if parent_name else child_name
                if isinstance(child_module, (torch.nn.BatchNorm2d, torch.nn.ReLU)):
                    conv_bn_relu.append(full_child_name)
                if len(conv_bn_relu) == 2 and isinstance(child_module, torch.nn.BatchNorm2d):
                    conv_bn_relu.append('relu')
                    fusible_patterns.append(conv_bn_relu)
                    break
                elif len(conv_bn_relu) == 1 and isinstance(child_module, torch.nn.ReLU):
                    conv_bn_relu.append(full_child_name)
                    fusible_patterns.append(conv_bn_relu)
                    break
    return fusible_patterns

# Example usage
# model = YourModel()
fusible_patterns = get_fusible_module_patterns(model)
print(fusible_patterns)


[]


In [79]:
import torch
from torchvision.models.segmentation import deeplabv3_resnet50

# Load the pretrained DeepLab model
model = deeplabv3_resnet50(pretrained=True)


In [76]:
from torch.quantization import fuse_modules

def fuse_model(model):
    for module_name, module in model.named_children():
        if isinstance(module, torch.nn.Sequential):
            for idx in range(len(module)):
                if isinstance(module[idx], torch.nn.Conv2d) and idx + 2 < len(module):
                    if isinstance(module[idx + 1], torch.nn.BatchNorm2d) and isinstance(module[idx + 2], torch.nn.ReLU):
                        # Fuse Conv, BN, ReLU
                        fuse_modules(module, [str(idx), str(idx + 1), str(idx + 2)], inplace=True)
        else:
            fuse_model(module)  # Recursively fuse children layers

# Apply fusion to the model
fuse_model(model.backbone)


In [80]:
print(model)

DeepLabV3(
  (backbone): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Se

In [46]:
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

In [54]:
print_size_of_model(model)

Size (MB): 168.351276


In [55]:
!quota -vs

Disk quotas for user ksanka (uid 381380): 
     Filesystem   space   quota   limit   grace   files   quota   limit   grace
10.1.1.10:/home  32340M  39936M  40960M            299k       0       0        


In [None]:
python3.8 train_classifier_ssfl.py --data_name CIFAR10 --model_name wresnet28x2 --control_name 1000_fix@0.85_5_0.3_non-iid-d-0.3_5-5_0.5_0_1