##Optimizing License Plate Recognition - Pytorch model

In [1]:
!pip install torch torchvision onnx onnxruntime tvm

Collecting onnx
  Downloading onnx-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Collecting onnxruntime
  Downloading onnxruntime-1.20.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting tvm
  Downloading tvm-1.0.0.tar.gz (5.4 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting appdirs (from tvm)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting docopt (from tvm)
  Downloading docopt-0.6.2.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting inform (from tvm)
  Downloading inform-1.32-py3-none-any.whl.metadata (11 kB)
Collecting quantiphy (from tvm)
  Downloading quantiphy-2.20-py3-none-any.whl.metadata (7.7 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metad

In [2]:
import torch
print("GPU Available:", torch.cuda.is_available())

GPU Available: True


In [3]:
!python3 -m  pip install mlc-ai-cpu -f https://mlc.ai/wheels

Looking in links: https://mlc.ai/wheels
Collecting mlc-ai-cpu
  Downloading https://github.com/mlc-ai/package/releases/download/v0.9.dev0/mlc_ai_cpu-0.17.2-cp310-cp310-manylinux_2_28_x86_64.whl (185.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m185.8/185.8 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: mlc-ai-cpu
Successfully installed mlc-ai-cpu-0.17.2


In [4]:
!git clone https://github.com/sirius-ai/LPRNet_Pytorch.git
%cd LPRNet_Pytorch

Cloning into 'LPRNet_Pytorch'...
remote: Enumerating objects: 1071, done.[K
remote: Counting objects: 100% (34/34), done.[K
remote: Compressing objects: 100% (12/12), done.[K
remote: Total 1071 (delta 25), reused 22 (delta 22), pack-reused 1037 (from 1)[K
Receiving objects: 100% (1071/1071), 20.04 MiB | 16.16 MiB/s, done.
Resolving deltas: 100% (35/35), done.
/content/LPRNet_Pytorch


In [5]:
!python test_LPRNet.py --test_img_dirs ./data/test --pretrained_model ./weights/Final_LPRNet_model.pth

Successful to build network!
  lprnet.load_state_dict(torch.load(args.pretrained_model))
load pretrained model successful!
[Info] Test Accuracy: 0.897 [897:63:40:1000]
[Info] Test Speed: 0.0024281809329986573s 1/1000]


In [6]:
import tvm
import torch.nn as nn
import torch

## Original LPRNet model

In [7]:
class small_basic_block(nn.Module):
    def __init__(self, ch_in, ch_out):
        super(small_basic_block, self).__init__()
        self.block = nn.Sequential(
            nn.Conv2d(ch_in, ch_out // 4, kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(ch_out // 4, ch_out // 4, kernel_size=(3, 1), padding=(1, 0)),
            nn.ReLU(),
            nn.Conv2d(ch_out // 4, ch_out // 4, kernel_size=(1, 3), padding=(0, 1)),
            nn.ReLU(),
            nn.Conv2d(ch_out // 4, ch_out, kernel_size=1),
        )
    def forward(self, x):
        return self.block(x)
class LPRNet(nn.Module):
    def __init__(self, lpr_max_len, phase, class_num, dropout_rate):
        super(LPRNet, self).__init__()
        self.phase = phase
        self.lpr_max_len = lpr_max_len
        self.class_num = class_num
        self.backbone = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1), # 0
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),  # 2
            nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 1, 1)),
            small_basic_block(ch_in=64, ch_out=128),    # *** 4 ***
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),  # 6
            nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(2, 1, 2)),
            small_basic_block(ch_in=64, ch_out=256),   # 8
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),  # 10
            small_basic_block(ch_in=256, ch_out=256),   # *** 11 ***
            nn.BatchNorm2d(num_features=256),   # 12
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(4, 1, 2)),  # 14
            nn.Dropout(dropout_rate),
            nn.Conv2d(in_channels=64, out_channels=256, kernel_size=(1, 4), stride=1),  # 16
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),  # 18
            nn.Dropout(dropout_rate),
            nn.Conv2d(in_channels=256, out_channels=class_num, kernel_size=(13, 1), stride=1), # 20
            nn.BatchNorm2d(num_features=class_num),
            nn.ReLU(),  # *** 22 ***
        )
        self.container = nn.Sequential(
            nn.Conv2d(in_channels=448+self.class_num, out_channels=self.class_num, kernel_size=(1, 1), stride=(1, 1)),
            # nn.BatchNorm2d(num_features=self.class_num),
            # nn.ReLU(),
            # nn.Conv2d(in_channels=self.class_num, out_channels=self.lpr_max_len+1, kernel_size=3, stride=2),
            # nn.ReLU(),
        )

    def forward(self, x):
        keep_features = list()
        for i, layer in enumerate(self.backbone.children()):
            x = layer(x)
            if i in [2, 6, 13, 22]: # [2, 4, 8, 11, 22]
                keep_features.append(x)

        global_context = list()
        for i, f in enumerate(keep_features):
            if i in [0, 1]:
                f = nn.AvgPool2d(kernel_size=5, stride=5)(f)
            if i in [2]:
                f = nn.AvgPool2d(kernel_size=(4, 10), stride=(4, 2))(f)
            f_pow = torch.pow(f, 2)
            f_mean = torch.mean(f_pow)
            f = torch.div(f, f_mean)
            global_context.append(f)

        x = torch.cat(global_context, 1)
        x = self.container(x)
        logits = torch.mean(x, dim=2)

        return logits

def build_lprnet(lpr_max_len=8, phase=False, class_num=66, dropout_rate=0.5):

    Net = LPRNet(lpr_max_len, phase, class_num, dropout_rate)

    if phase == "train":
        return Net.train()
    else:
        return Net.eval()

## Setting up test function -
(similar to test_LPRNet.py from the LPRNet repository)

In [8]:
from data.load_data import CHARS, CHARS_DICT, LPRDataLoader
from PIL import Image, ImageDraw, ImageFont
from model.LPRNet import build_lprnet
# import torch.backends.cudnn as cudnn
from torch.autograd import Variable
import torch.nn.functional as F
from torch.utils.data import *
from torch import optim
import torch.nn as nn
import numpy as np
import argparse
import torch
import time
import cv2
import os

class Config:
    def __init__(self):
        self.img_size = [94, 24]
        self.test_img_dirs = "./data/test"
        self.dropout_rate = 0
        self.lpr_max_len = 8
        self.test_batch_size = 100
        self.phase_train = False
        self.num_workers = 8
        self.cuda = False
        self.show = False
        self.pretrained_model = './weights/Final_LPRNet_model.pth'

args = Config()


def collate_fn(batch):
    imgs = []
    labels = []
    lengths = []
    for _, sample in enumerate(batch):
        img, label, length = sample
        imgs.append(torch.from_numpy(img))
        labels.extend(label)
        lengths.append(length)
    labels = np.asarray(labels).flatten().astype(np.float32)

    return (torch.stack(imgs, 0), torch.from_numpy(labels), lengths)

def test(module):
    test_img_dirs = os.path.expanduser(args.test_img_dirs)
    test_dataset = LPRDataLoader(test_img_dirs.split(','), args.img_size, args.lpr_max_len)
    Greedy_Decode_Eval_tvm(module, test_dataset, args)


def Greedy_Decode_Eval_tvm(module, datasets, args):
    # TestNet = Net.eval()
    epoch_size = len(datasets) // args.test_batch_size
    batch_iterator = iter(DataLoader(datasets, args.test_batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn))

    Tp = 0
    Tn_1 = 0
    Tn_2 = 0
    t1 = time.time()
    for i in range(epoch_size):
        # load train data
        images, labels, lengths = next(batch_iterator)
        start = 0
        targets = []
        for length in lengths:
            label = labels[start:start+length]
            targets.append(label)
            start += length
        targets = np.array([el.numpy() for el in targets])
        imgs = images.numpy().copy()

        if args.cuda:
            images = Variable(images.cuda())
        else:
            images = Variable(images)

        module.set_input(input_name, tvm.nd.array(images.numpy()))
        module.run()
        tvm_output = module.get_output(0).asnumpy()
        prebs = tvm_output
        preb_labels = list()
        for i in range(prebs.shape[0]):
            preb = prebs[i, :, :]
            preb_label = list()
            for j in range(preb.shape[1]):
                preb_label.append(np.argmax(preb[:, j], axis=0))
            no_repeat_blank_label = list()
            pre_c = preb_label[0]
            if pre_c != len(CHARS) - 1:
                no_repeat_blank_label.append(pre_c)
            for c in preb_label: # dropout repeate label and blank label
                if (pre_c == c) or (c == len(CHARS) - 1):
                    if c == len(CHARS) - 1:
                        pre_c = c
                    continue
                no_repeat_blank_label.append(c)
                pre_c = c
            preb_labels.append(no_repeat_blank_label)
        for i, label in enumerate(preb_labels):
            if len(label) != len(targets[i]):
                Tn_1 += 1
                continue
            if (np.asarray(targets[i]) == np.asarray(label)).all():
                Tp += 1
            else:
                Tn_2 += 1
    Acc = Tp * 1.0 / (Tp + Tn_1 + Tn_2)
    print("[Info] Test Accuracy: {} [{}:{}:{}:{}]".format(Acc, Tp, Tn_1, Tn_2, (Tp+Tn_1+Tn_2)))
    t2 = time.time()
    print("[Info] Test Speed: {}s 1/{}]".format((t2 - t1) / len(datasets), len(datasets)))


## Original Model Size

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from model.LPRNet import build_lprnet  # Ensure this matches your file path
from onnxruntime.quantization import quantize_dynamic, QuantType

# Initialize the original model and wrap it
lprnet_model = build_lprnet()  # Load the LPRNet model

# Now you can export the wrapped model to ONNX
dummy_input = torch.randn(1, 3, 24, 94,device='cuda')  # Dummy input for ONNX export
lprnet_model.to(dummy_input.device)
onnx_model_path = './weights/lprnet_model.onnx'
torch.onnx.export(lprnet_model, dummy_input, onnx_model_path, weight_type=QuantType.QInt8)
print(f'Model exported to: {onnx_model_path}')

import os

# Get the file size of the ONNX model
onnx_size = os.path.getsize(onnx_model_path) / (1024 * 1024)  # Size in MB
print(f"ONNX model size: {onnx_size:.2f} MB")


Model exported to: ./weights/lprnet_model.onnx
ONNX model size: 1.68 MB


## MLC Optimization

Setting up model to perform MLC optimizations

In [11]:
import tvm
from tvm import relay
import onnx
from tvm.contrib import graph_executor

dummy_input = torch.randn(1, 3, 24, 94)
lprnet = build_lprnet(lpr_max_len=8, phase=False, class_num=68, dropout_rate=0.5)
lprnet.load_state_dict(torch.load("./weights/Final_LPRNet_model.pth",  map_location=torch.device('cpu')))
lprnet.eval()

relay_model = torch.jit.trace(lprnet,dummy_input).eval()
relay_model.save("jit_traced_lprnet.pt")

# Load the model
relay_model = torch.jit.load("jit_traced_lprnet.pt")
relay_model.eval()

# Define the input shape for the model
input_name = "input0"
input_shape = (100, 3, 24, 94)
shape_dict = [(input_name,input_shape)]

# Convert ONNX model to TVM Relay format
relay_mod, params = relay.frontend.from_pytorch(relay_model, shape_dict)

# Specify target (e.g., NVIDIA GPU or CPU)
target = "llvm"  # Use "llvm" for CPU
dev = tvm.cuda(0) if target == "cuda" else tvm.cpu()


  lprnet.load_state_dict(torch.load("./weights/Final_LPRNet_model.pth",  map_location=torch.device('cpu')))


In [12]:
from tvm import relay

# Start with type inference and simplification
mod = relay.transform.InferType()(relay_mod)
mod = relay.transform.SimplifyInference()(mod)

# Perform advanced optimizations
mod = relay.transform.FoldScaleAxis()(mod)
mod = relay.transform.FuseOps()(mod)
mod = relay.transform.AlterOpLayout()(mod)

mod = relay.transform.EliminateCommonSubexpr()(mod)

# Final cleanup and memory optimization
mod = relay.transform.DeadCodeElimination()(mod)

In [13]:
# Define the target device
target = "llvm"
dev = tvm.cuda(0) if target == "cuda" else tvm.cpu()

# Compile the model
with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, params=params)

module = graph_executor.GraphModule(lib["default"](dev))
test(module)



[Info] Test Accuracy: 0.903 [903:57:40:1000]
[Info] Test Speed: 0.03870338368415833s 1/1000]


##Perform TVM Autotune

In [14]:
from tvm.autotvm.tuner import XGBTuner
from tvm import autotvm

In [15]:
import onnx
model = onnx.load(onnx_model_path)

# Get input names
input_all = [node.name for node in model.graph.input]
input_initializer = [node.name for node in model.graph.initializer]
net_feed_input = list(set(input_all) - set(input_initializer))

print("Input names:", net_feed_input)

Input names: ['input.1']


In [16]:
from tvm.autotvm.tuner import XGBTuner
from tvm.autotvm import task, measure
import tvm.auto_scheduler as auto_scheduler

# Extract tasks for tuning
tasks = tvm.autotvm.task.extract_from_program(
    relay_mod["main"], target=target, params=params
)

# Check the extracted tasks
print(f"Extracted {len(tasks)} tasks for tuning.")

Extracted 13 tasks for tuning.


In [17]:
from tvm.autotvm.measure.measure_methods import LocalBuilder, LocalRunner

# Define the measure option for local execution
measure_option = tvm.autotvm.measure_option(
    builder=LocalBuilder(),
    runner=LocalRunner(number=10,repeat=1, timeout=10, min_repeat_ms=100)
)

tuning_option = {
    "log_filename": "autotvm_tuning.log",
    "tuner": "xgb",  # Use XGBoost tuner
    "n_trial": 20,  # Reduce number of trials
    "early_stopping": 50,  # Stop earlier if no improvements
}



# Perform tuning
for i, tsk in enumerate(tasks):
    print(f"Tuning task {i + 1}/{len(tasks)}")
    tuner = XGBTuner(tsk)
    tuner.tune(
        n_trial=tuning_option["n_trial"],
        early_stopping=tuning_option["early_stopping"],

        measure_option=measure_option,
        callbacks=[
            tvm.autotvm.callback.log_to_file(tuning_option["log_filename"]),
            tvm.autotvm.callback.progress_bar(tuning_option["n_trial"]),
        ],
    )

# Apply the best tuning results
with tvm.autotvm.apply_history_best(tuning_option["log_filename"]):
    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build(relay_mod, target=target, params=params)

# Save the graph and parameters
with open("graph.json", "w") as f:
    f.write(lib.get_graph_json())  # Save the model graph

with open("params.params", "wb") as f:
    f.write(tvm.runtime.save_param_dict(params))  # Save the model parameters


# Save the compiled module (optional)
lib.export_library("optimized_model.tar")



Tuning task 1/13
 Current/Best:    6.18/  17.26 GFLOPS | Progress: (20/20) | 46.29 s Done.
Tuning task 2/13
 Current/Best:    3.34/  21.32 GFLOPS | Progress: (20/20) | 48.14 s Done.
Tuning task 3/13
 Current/Best:    2.19/  16.09 GFLOPS | Progress: (20/20) | 63.63 s Done.
Tuning task 4/13
 Current/Best:   14.13/  22.47 GFLOPS | Progress: (20/20) | 45.70 s Done.
Tuning task 5/13
 Current/Best:   10.23/  18.65 GFLOPS | Progress: (20/20) | 74.39 s Done.
Tuning task 6/13
 Current/Best:    6.98/  20.78 GFLOPS | Progress: (20/20) | 31.26 s Done.
Tuning task 7/13
 Current/Best:   13.43/  16.27 GFLOPS | Progress: (20/20) | 85.41 s Done.
Tuning task 8/13
 Current/Best:    6.71/  21.99 GFLOPS | Progress: (20/20) | 56.99 s Done.
Tuning task 9/13
 Current/Best:    9.10/  14.97 GFLOPS | Progress: (20/20) | 104.99 s Done.
Tuning task 10/13
 Current/Best:   13.96/  19.62 GFLOPS | Progress: (20/20) | 88.28 s Done.
Tuning task 11/13
 Current/Best:    8.15/  19.56 GFLOPS | Progress: (20/20) | 100.92 s D

In [18]:
with autotvm.apply_history_best(tuning_option["log_filename"]):
    with tvm.transform.PassContext(opt_level=3, config={}):
        lib = relay.build(mod, target=target, params=params)

dev = tvm.device(str(target), 0)
module = graph_executor.GraphModule(lib["default"](dev))

In [19]:
test(module)



[Info] Test Accuracy: 0.899 [899:61:40:1000]
[Info] Test Speed: 0.03080963134765625s 1/1000]


In [20]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from model.LPRNet import build_lprnet  # Ensure this matches your file path

# Initialize the original model and wrap it
lprnet_model = build_lprnet()  # Load the LPRNet model

# Now you can export the wrapped model to ONNX
dummy_input = torch.randn(1, 3, 24, 94,device='cuda')  # Dummy input for ONNX export
lprnet_model.to(dummy_input.device)
mlc_optimized_onnx_model_path = './optimized_model.tar'
torch.onnx.export(lprnet_model, dummy_input, mlc_optimized_onnx_model_path, weight_type=QuantType.QInt8)
print(f'Model exported to: {mlc_optimized_onnx_model_path}')

import os

# Get the file size of the ONNX model
mlc_onnx_size = os.path.getsize(mlc_optimized_onnx_model_path) / (1024 * 1024)  # Size in MB
print(f"ONNX model size: {mlc_onnx_size:.2f} MB")


Model exported to: ./optimized_model.tar
ONNX model size: 1.68 MB


## MODEL OPTIMIZATION

In [21]:
# Display the script content with line numbers
!cat -n test_LPRNet.py

     1	# -*- coding: utf-8 -*-
     2	# /usr/bin/env/python3
     3	
     4	'''
     5	test pretrained model.
     6	Author: aiboy.wei@outlook.com .
     7	'''
     8	
     9	from data.load_data import CHARS, CHARS_DICT, LPRDataLoader
    10	from PIL import Image, ImageDraw, ImageFont
    11	from model.LPRNet import build_lprnet
    12	# import torch.backends.cudnn as cudnn
    13	from torch.autograd import Variable
    14	import torch.nn.functional as F
    15	from torch.utils.data import *
    16	from torch import optim
    17	import torch.nn as nn
    18	import numpy as np
    19	import argparse
    20	import torch
    21	import time
    22	import cv2
    23	import os
    24	
    25	def get_parser():
    26	    parser = argparse.ArgumentParser(description='parameters to train net')
    27	    parser.add_argument('--img_size', default=[94, 24], help='the image size')
    28	    parser.add_argument('--test_img_dirs', default="./data/test", help='the test images path')
    29	    parse

In [22]:
file_path = "test_LPRNet.py"
with open(file_path, "r") as file:
    lines = file.readlines()

with open(file_path, "w") as file:
    inside_finally = False
    for line in lines:
        if line.strip() == 'finally:':
            inside_finally = True
            file.write(line)  # write the finally line
        elif inside_finally and line.strip() == '':
            inside_finally = False  # end of finally block
            file.write(line)
        elif inside_finally:
            if "cv2.destroyAllWindows()" in line:
                # Replace cv2.destroyAllWindows() with pass, ensuring proper indentation
                file.write('         pass\n')  # Ensuring 'pass' is indented correctly
            else:
                # Indent other lines inside finally block
                file.write('    ' + line)
        else:
            file.write(line)


In [23]:
# Re-display the script to ensure the change is applied
!cat -n test_LPRNet.py

     1	# -*- coding: utf-8 -*-
     2	# /usr/bin/env/python3
     3	
     4	'''
     5	test pretrained model.
     6	Author: aiboy.wei@outlook.com .
     7	'''
     8	
     9	from data.load_data import CHARS, CHARS_DICT, LPRDataLoader
    10	from PIL import Image, ImageDraw, ImageFont
    11	from model.LPRNet import build_lprnet
    12	# import torch.backends.cudnn as cudnn
    13	from torch.autograd import Variable
    14	import torch.nn.functional as F
    15	from torch.utils.data import *
    16	from torch import optim
    17	import torch.nn as nn
    18	import numpy as np
    19	import argparse
    20	import torch
    21	import time
    22	import cv2
    23	import os
    24	
    25	def get_parser():
    26	    parser = argparse.ArgumentParser(description='parameters to train net')
    27	    parser.add_argument('--img_size', default=[94, 24], help='the image size')
    28	    parser.add_argument('--test_img_dirs', default="./data/test", help='the test images path')
    29	    parse

In [24]:
!python test_LPRNet.py --test_img_dirs ./data/test --pretrained_model ./weights/Final_LPRNet_model.pth

Successful to build network!
  lprnet.load_state_dict(torch.load(args.pretrained_model))
load pretrained model successful!
[Info] Test Accuracy: 0.901 [901:57:42:1000]
[Info] Test Speed: 0.0008513927459716797s 1/1000]


In [25]:
import torch
from model.LPRNet import build_lprnet
from data.load_data import CHARS, CHARS_DICT, LPRDataLoader
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load the LPRNet model
lprnet = build_lprnet(lpr_max_len=8, phase=False, class_num=len(CHARS), dropout_rate=0.5)
lprnet.to(device)
weights_path='/content/LPRNet_Pytorch/weights/Final_LPRNet_model.pth'
# Load pre-trained weights
lprnet.load_state_dict(torch.load(weights_path,weights_only=True))
print("Model loaded successfully!")

Model loaded successfully!


1. PRUNING

In [26]:
import torch
import torch.nn as nn
import torch.nn.utils.prune as prune
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
import torchvision
import torchvision.transforms as transforms
from imutils import paths
import numpy as np
import random
import cv2
import os
from torch.utils.data import Dataset
import time


# Pruning Function
def apply_pruning(model, pruning_percentage=0.3, device='cuda'):
    model.to(device)
    for name, module in model.named_modules():
        if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
            print(f"Pruning layer: {name}")
            prune.l1_unstructured(module, name="weight", amount=pruning_percentage)
            prune.remove(module, 'weight')  # Make pruning permanent
    print("Pruning complete!")
    return model

# Character Set for License Plates
CHARS = ['京', '沪', '津', '渝', '冀', '晋', '蒙', '辽', '吉', '黑',
         '苏', '浙', '皖', '闽', '赣', '鲁', '豫', '鄂', '湘', '粤',
         '桂', '琼', '川', '贵', '云', '藏', '陕', '甘', '青', '宁',
         '新', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
         'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K',
         'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
         'W', 'X', 'Y', 'Z', 'I', 'O', '-']
CHARS_DICT = {char: i for i, char in enumerate(CHARS)}

# Dataset Class
class LPRDataLoader(Dataset):
    def __init__(self, img_dir, imgSize, lpr_max_len, PreprocFun=None):
        self.img_dir = img_dir
        self.img_paths = []
        for dir_path in img_dir:
            self.img_paths += [el for el in paths.list_images(dir_path)]
        random.shuffle(self.img_paths)
        self.img_size = imgSize
        self.lpr_max_len = lpr_max_len
        self.PreprocFun = PreprocFun if PreprocFun else self.transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, index):
        filename = self.img_paths[index]
        image = cv2.imread(filename)
        image = cv2.resize(image, self.img_size)
        image = self.PreprocFun(image)

        basename = os.path.basename(filename)
        imgname = basename.split("-")[0].split("_")[0]

        # Filter out invalid characters
        imgname = ''.join([c for c in imgname if c in CHARS_DICT])

        # Convert valid characters to labels
        label = [CHARS_DICT[c] for c in imgname]

        return torch.tensor(image, dtype=torch.float32), torch.tensor(label, dtype=torch.long), len(label)

    def transform(self, img):
        img = img.astype('float32')
        img -= 127.5
        img *= 0.0078125
        img = np.transpose(img, (2, 0, 1))  # Convert to (C, H, W)
        return img

# # Evaluation Function
# def greedy_decode_evaluate(model, test_loader, device='cuda'):
#     model.eval()
#     correct, total = 0, 0
#     with torch.no_grad():
#         for data in test_loader:
#             inputs, labels, lengths = data
#             inputs = inputs.to(device)
#             labels = labels.to(device)

#             # Perform model inference
#             outputs = model(inputs).cpu().numpy()

#             for i, preb in enumerate(outputs):
#                 preb_label = [np.argmax(preb[:, j]) for j in range(preb.shape[1])]

#                 # Greedy decoding: Remove repeated and blank tokens
#                 no_repeat_blank_label = []
#                 prev_c = preb_label[0]
#                 if prev_c != len(CHARS) - 1:
#                     no_repeat_blank_label.append(prev_c)
#                 for c in preb_label:
#                     if c != prev_c and c != len(CHARS) - 1:
#                         no_repeat_blank_label.append(c)
#                     prev_c = c

#                 # Compare decoded prediction with ground truth
#                 gt_label = labels[i][:lengths[i]].tolist()
#                 if no_repeat_blank_label == gt_label:
#                     correct += 1
#                 total += 1

#     # Calculate accuracy
#     accuracy = 100.0 * correct / total if total > 0 else 0.0
#     return accuracy


# Fix for shape mismatch while loading pre-trained weights
def load_pretrained_weights(model, pretrained_model_path):
    checkpoint = torch.load(pretrained_model_path,weights_only=True)
    model_state_dict = model.state_dict()

    # List of layers to update based on matching shapes
    for key in checkpoint.keys():
        if key in model_state_dict and checkpoint[key].shape == model_state_dict[key].shape:
            model_state_dict[key] = checkpoint[key]
        else:
            print(f"Skipping layer {key} due to shape mismatch")

    model.load_state_dict(model_state_dict)
    print("Pre-trained weights loaded successfully (with shape mismatch handling).")

import os
from torch.utils.data import Dataset, DataLoader
from PIL import Image



# Function to Load Test Data
def get_test_loader(test_folder, batch_size=32):
    test_dataset = LPRDataLoader(test_folder.split(','),imgSize=(94, 24),lpr_max_len='8')
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
    return test_loader


In [27]:
# Main Testing Code
if __name__ == '__main__':
    test_folder = "/content/LPRNet_Pytorch/data/test"  # Path to your test dataset
    pretrained_model_path = weights_path # Path to pretrained weights

    # Initialize your LPRNet model
    lprnet = build_lprnet(class_num=68, dropout_rate=0.5)  # Replace with LPRNet initialization

    # Load Pre-trained Weights
    load_pretrained_weights(lprnet, pretrained_model_path)

    # Get Test DataLoader
    test_loader = get_test_loader(test_folder, batch_size=100)

    # Prune the Model
    unstructured_pruning = apply_pruning(lprnet, pruning_percentage=0.3, device='cuda')

Pre-trained weights loaded successfully (with shape mismatch handling).
Pruning layer: backbone.0
Pruning layer: backbone.4.block.0
Pruning layer: backbone.4.block.2
Pruning layer: backbone.4.block.4
Pruning layer: backbone.4.block.6
Pruning layer: backbone.8.block.0
Pruning layer: backbone.8.block.2
Pruning layer: backbone.8.block.4
Pruning layer: backbone.8.block.6
Pruning layer: backbone.11.block.0
Pruning layer: backbone.11.block.2
Pruning layer: backbone.11.block.4
Pruning layer: backbone.11.block.6
Pruning layer: backbone.16
Pruning layer: backbone.20
Pruning layer: container.0
Pruning complete!


In [28]:
torch.save(unstructured_pruning.state_dict(), './weights/pruned_model_weights_trial2.pth')


##Speed After Pruning

In [29]:
! python test_LPRNet.py --pretrained_model ./weights/pruned_model_weights_trial2.pth

Successful to build network!
  lprnet.load_state_dict(torch.load(args.pretrained_model))
load pretrained model successful!
[Info] Test Accuracy: 0.885 [885:71:44:1000]
[Info] Test Speed: 0.0008252706527709961s 1/1000]


Model Size After Pruning

In [30]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from model.LPRNet import build_lprnet  # Ensure this matches your file path



# Now you can export the wrapped model to ONNX
dummy_input = torch.randn(1, 3, 24, 94,device='cuda')  # Dummy input for ONNX export
unstructured_pruning.to(dummy_input.device)
pruned_onnx_model_path = './weights/pruned_lprnet_model.onnx'
torch.onnx.export(unstructured_pruning, dummy_input, pruned_onnx_model_path, weight_type=QuantType.QInt8)
print(f'Model exported to: {pruned_onnx_model_path}')


Model exported to: ./weights/pruned_lprnet_model.onnx


In [31]:
# Get the file size of the ONNX model
unstructuredpruning_onnx_size = os.path.getsize(pruned_onnx_model_path) / (1024 * 1024)  # Size in MB
print(f"Pruned ONNX model size: {unstructuredpruning_onnx_size:.2f} MB")


Pruned ONNX model size: 1.71 MB


## Quantization

In [32]:
from torch.quantization import quantize_dynamic

def dynamic_quantize(lprnet):
  quantized_lprnet = quantize_dynamic(lprnet, {nn.Conv2d, nn.Linear}, dtype=torch.qint8)
  torch.save(quantized_lprnet.state_dict(), "quantized_lprnet.pth")
  return quantized_lprnet

In [33]:
# Main Testing Code
if __name__ == '__main__':
    test_folder = "/content/LPRNet_Pytorch/data/test"  # Path to your test dataset
    pretrained_model_path = weights_path # Path to pretrained weights

    # Initialize your LPRNet model
    lprnet = build_lprnet(class_num=68, dropout_rate=0.5)  # Replace with LPRNet initialization

    # Load Pre-trained Weights
    load_pretrained_weights(lprnet, pretrained_model_path)

    # Get Test DataLoader
    test_loader = get_test_loader(test_folder, batch_size=100)
    # Prune the Model
    dynamic_quantized = dynamic_quantize(lprnet)
    dynamic_quantized.to('cuda')

Pre-trained weights loaded successfully (with shape mismatch handling).


In [34]:
def fuse_lprnet_corrected(model):
    """
    Optimize the LPRNet model by fusing only valid layer combinations.

    Args:
        model (torch.nn.Module): The LPRNet model to optimize.

    Returns:
        torch.nn.Module: The optimized and fused LPRNet model.
    """

    def fuse_sequential_layers(sequential_block, block_name):
        """
        Fuse eligible layer combinations in a sequential block.

        Args:
            sequential_block (nn.Sequential): A sequential block of layers.
            block_name (str): The name of the block for debugging/logging.
        """
        valid_fusions = []
        for name, layer in sequential_block.named_children():
          if isinstance(layer, nn.Conv2d) or (
             valid_fusions and isinstance(layer, (nn.BatchNorm2d, nn.ReLU))
           ):
            valid_fusions.append(name)
          else:
            if len(valid_fusions) >= 2:
              torch.quantization.fuse_modules(sequential_block, valid_fusions, inplace=True)
              print(f"Fused layers in block {block_name}: {valid_fusions}")
            valid_fusions = []  # Reset
        if len(valid_fusions) >= 2:
          torch.quantization.fuse_modules(sequential_block, valid_fusions, inplace=True)
          print(f"Fused layers in block {block_name}: {valid_fusions}")

    def traverse_and_fuse(module, module_name=""):
        """
        Recursively traverse and apply fusion on modules.

        Args:
            module (torch.nn.Module): Current module to process.
            module_name (str): Parent module name for debugging/logging.
        """
        if isinstance(module, nn.Sequential):
            fuse_sequential_layers(module, module_name)
        elif hasattr(module, "children"):
            for name, child in module.named_children():
                full_name = f"{module_name}.{name}" if module_name else name
                traverse_and_fuse(child, full_name)

    # Fuse layers in backbone
    if hasattr(model, "backbone"):
        print("Fusing backbone layers...")
        traverse_and_fuse(model.backbone, "backbone")

    # Fuse layers in container (if present)
    if hasattr(model, "container"):
        print("Fusing container layers...")
        fuse_sequential_layers(model.container, "container")

    print("Fusion process completed.")
    return model


In [35]:
from torch.ao.quantization import prepare, convert
if __name__ == "__main__":
    # Initialize your LPRNet model
    lprnet = build_lprnet(class_num=68, dropout_rate=0.5)  # Replace with LPRNet initialization

    # Load Pre-trained Weights
    load_pretrained_weights(lprnet, pretrained_model_path)

    # Load the data
    test_loader = get_test_loader(test_folder, batch_size=100)
    lprnet.to('cuda')
    fused_lprnet = fuse_lprnet_corrected(lprnet)
    fused_lprnet.eval()
    fused_lprnet.qconfig = torch.quantization.get_default_qconfig('qnnpack')  # or 'qnnpack' for mobile
    torch.quantization.prepare(fused_lprnet, inplace=True)
    torch.quantization.convert(fused_lprnet, inplace=False)

    torch.save(fused_lprnet.state_dict(), "fused_lprnet_quantized_lprnet.pth")


Pre-trained weights loaded successfully (with shape mismatch handling).
Fusing backbone layers...
Fused layers in block backbone: ['0', '1', '2']
Fused layers in block backbone: ['16', '17', '18']
Fused layers in block backbone: ['20', '21', '22']
Fusing container layers...
Fusion process completed.




In [36]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from model.LPRNet import build_lprnet  # Ensure this matches your file path



# Now you can export the wrapped model to ONNX
dummy_input = torch.randn(1, 3, 24, 94,device='cuda')  # Dummy input for ONNX export
fused_lprnet.to(dummy_input.device)
quantized_onnx_model_path = './weights/fused_lprnet_quantized_lprnet.pth'
torch.onnx.export(fused_lprnet, dummy_input, quantized_onnx_model_path, weight_type=QuantType.QInt8)
print(f'Model exported to: {quantized_onnx_model_path}')


Model exported to: ./weights/fused_lprnet_quantized_lprnet.pth


In [37]:
# Get the file size of the ONNX model
quantized_onnx_size = os.path.getsize(quantized_onnx_model_path) / (1024 * 1024)  # Size in MB
print(f"ONNX model size: {quantized_onnx_size:.2f} MB")


ONNX model size: 1.71 MB


Speed after Quantization

In [38]:
! python test_LPRNet.py --pretrained_model ./quantized_lprnet.pth

Successful to build network!
  lprnet.load_state_dict(torch.load(args.pretrained_model))
load pretrained model successful!
[Info] Test Accuracy: 0.899 [899:60:41:1000]
[Info] Test Speed: 0.0008352820873260498s 1/1000]
