In [4]:
import os

import cv2
import torch
from torch.utils.data import DataLoader
import torch.optim as optim
from torch.nn import CTCLoss

from dataset import Synth90kDataset, synth90k_collate_fn
from model import CRNN
from evaluate import evaluate
from config import train_config as config
import numpy as np

import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import timm

def train_batch(crnn, data, optimizer, criterion, device):
    crnn.train()
    images, targets, target_lengths = [d.to(device) for d in data]
    #print('image size')
    #print(images.size())
    logits = crnn(images)
    
    log_probs = torch.nn.functional.log_softmax(logits, dim=2)
    
    batch_size = images.size(0)
    input_lengths = torch.LongTensor([logits.size(0)] * batch_size)
    target_lengths = torch.flatten(target_lengths)
    '''print('batch logits log_probs targets input_len target_len')
    print(batch_size)
    print(logits.size())
    print(log_probs.size())
    print(targets.size())
    print(input_lengths.size())
    print(target_lengths.size())
    print('end')'''
    loss = criterion(log_probs, targets, input_lengths, target_lengths)

    optimizer.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_norm_(crnn.parameters(), 5) # gradient clipping with 5
    optimizer.step()
    return loss.item()



In [2]:

epochs = config['epochs']
train_batch_size = config['train_batch_size']
eval_batch_size = config['eval_batch_size']
lr = config['lr']
show_interval = config['show_interval']
valid_interval = config['valid_interval']
save_interval = config['save_interval']
end_interval = 40000
cpu_workers = config['cpu_workers']
reload_checkpoint = config['reload_checkpoint']
valid_max_iter = config['valid_max_iter']

img_width = config['img_width']
img_height = config['img_height']
data_dir = config['data_dir']

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'device: {device}')

valid_dataset = Synth90kDataset(root_dir=data_dir, mode='test',
                                img_height=img_height, img_width=img_width)
valid_dataset.texts

device: cuda
0
00
002101
01
01206
01206368166
02110
0800
0th
1
10
100
101
1026
106
11
111
114
12
120
122
1229
13
13th
14
1410
14th
15
1600
1601
1620
165
1668
17
1701
1715
1770
17th
180
180plus
18th
19
191
1946
195
1957
1996
1st
1th
2
20
2000
2002
2008
2009
2010
2011
2076
20p
2101
212
21st
22
220
2204
222
22nd
236
25
250
25th
26
28
29th
2c
2gb
2nd
2th
3
30
31
311
319
31st
32803
32nd
33
34
358
35th
36
360
368166
3rd
3th
4
439
4447
45th
48245
4gb
4th
4u2watch
4x4
5
50
500
507
511
52
527
53
54
547
55
557
571
5742z
59
5a
5b
5th
6
60
61
617
61to69
631
69
6th
7
705
75
758
794447
794455
7th
8
80
800
801
818
83
88
89
8th
9
900
905
908
911
95
99
9th
a
a11
a12
a133
aa
aaa
aachen
aae
aah
aaliyah
aardvark
aardvarks
aaron
ab
aba
aback
abacus
abacuses
abaft
abalone
abalones
abandon
abandoned
abandoning
abandonment
abandons
abase
abased
abasement
abases
abash
abashed
abashedly
abashes
abashing
abashment
abasing
abate
abated
abatement
abates
abating
abattoir
abattoirs
abbas
abbasid
abbaye
abbe
abbes
ab

['slinking',
 'remodelers',
 'chronographs',
 'impeaching',
 'discombobulated',
 'loots',
 'underpays',
 'flameproofed',
 'enticements',
 'ukase',
 'descanted',
 'coolidge',
 'doziness',
 'deliberations',
 'supremacy',
 'remodelers',
 'resoled',
 'percipience',
 'terrifyingly',
 'prizewinner',
 'elaborately',
 'hopscotched',
 'telnetted',
 'disproportional',
 'warming',
 'bookstores',
 'meter',
 'savageness',
 'poppadoms',
 'guises',
 'tbs',
 'receptacle',
 'mas',
 'gladioli',
 'gunships',
 'coxes',
 'dozy',
 'empowered',
 'puzzlers',
 'designating',
 'goldmine',
 'wrist',
 'prepped',
 'punchy',
 'belching',
 'tuber',
 'lavatorial',
 'hostesses',
 'bathtub',
 'halfway',
 'rearm',
 'blather',
 'schoolboy',
 'townes',
 'annotating',
 'histories',
 'glossies',
 'rationalizes',
 'remaster',
 'yourself',
 'pot',
 'waffled',
 'darth',
 'crestless',
 'finn',
 'analogies',
 'fatsos',
 'penises',
 'domesticates',
 'brews',
 'ruthenium',
 'surer',
 'offenses',
 'sirrah',
 'stomping',
 'sparsity'

In [2]:
import torch
torch.cuda.is_available()

True

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'device: {device}')

device: cuda


# original CRNN

In [9]:
num_class = len(Synth90kDataset.LABEL2CHAR) + 1
print(num_class)
model = CRNN(1, 32, 100, num_class,
            map_to_seq_hidden=config['map_to_seq_hidden'],
            rnn_hidden=config['rnn_hidden'],
            leaky_relu=config['leaky_relu'])
#model.load_state_dict(torch.load('D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobileVit_chars74k_2.pth'),strict=False)
dummy_input = torch.randn(5, 1, 32, 100)
dummy_output =model(dummy_input)
dummy_output.size()

37
torch.Size([24, 5, 512])


torch.Size([24, 5, 37])

In [4]:


epochs = config['epochs']
train_batch_size = config['train_batch_size']
eval_batch_size = config['eval_batch_size']
lr = config['lr']
show_interval = config['show_interval']
valid_interval = config['valid_interval']
save_interval = config['save_interval']
end_interval = 40000
cpu_workers = config['cpu_workers']
reload_checkpoint = config['reload_checkpoint']
valid_max_iter = config['valid_max_iter']

img_width = config['img_width']
img_height = config['img_height']
data_dir = config['data_dir']

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'device: {device}')

train_dataset = Synth90kDataset(root_dir=data_dir, mode='train',
                                img_height=img_height, img_width=img_width)
valid_dataset = Synth90kDataset(root_dir=data_dir, mode='dev',
                                img_height=img_height, img_width=img_width)

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=train_batch_size,
    shuffle=True,
    num_workers=0,
    collate_fn=synth90k_collate_fn)
valid_loader = DataLoader(
    dataset=valid_dataset,
    batch_size=eval_batch_size,
    shuffle=True,
    num_workers=0,
    collate_fn=synth90k_collate_fn)

num_class = len(Synth90kDataset.LABEL2CHAR) + 1
crnn = CRNN(1, img_height, img_width, num_class,
            map_to_seq_hidden=config['map_to_seq_hidden'],
            rnn_hidden=config['rnn_hidden'],
            leaky_relu=config['leaky_relu'])
if reload_checkpoint:
    crnn.load_state_dict(torch.load(reload_checkpoint, map_location=device))
crnn.to(device)

optimizer = optim.Adam(crnn.parameters())
criterion = CTCLoss(reduction='sum', zero_infinity=True)
criterion.to(device)

loss_history = []
valid_history = []


assert save_interval % valid_interval == 0
i = 1
for epoch in range(1, epochs + 1):
    print(f'epoch: {epoch}')
    tot_train_loss = 0.
    tot_train_count = 0
    for train_data in train_loader:
        loss = train_batch(crnn, train_data, optimizer, criterion, device)
        train_size = train_data[0].size(0)

        tot_train_loss += loss
        tot_train_count += train_size
        if i % show_interval == 0:
            print('train_batch_loss[', i, ']: ', loss / train_size)
            loss_history.append(loss / train_size)

        if i % valid_interval == 0:
            evaluation = evaluate(crnn, valid_loader, criterion,
                                  decode_method=config['decode_method'],
                                  beam_size=config['beam_size'])
            print('valid_evaluation: loss={loss}, acc={acc}'.format(**evaluation))
            valid_history.append(evaluation)

            if i % save_interval == 0:
                prefix = 'crnn'
                loss = evaluation['loss']
                save_model_path = os.path.join(config['checkpoints_dir'],
                                               f'{prefix}_{i:06}_loss{loss}.pt')
                torch.save(crnn.state_dict(), save_model_path)
                print('save model at ', save_model_path)
        if i % end_interval == 0:
            break
            
        i += 1

    print('train_loss: ', tot_train_loss / tot_train_count)




device: cuda
epoch: 1
train_batch_loss[ 10 ]:  28.225263595581055
train_batch_loss[ 20 ]:  26.03909683227539
train_batch_loss[ 30 ]:  26.25438690185547
train_batch_loss[ 40 ]:  24.54315757751465
train_batch_loss[ 50 ]:  24.86879539489746
train_batch_loss[ 60 ]:  25.75336456298828
train_batch_loss[ 70 ]:  25.201152801513672
train_batch_loss[ 80 ]:  24.07999038696289
train_batch_loss[ 90 ]:  24.856430053710938
train_batch_loss[ 100 ]:  25.97860336303711
train_batch_loss[ 110 ]:  24.96133804321289
train_batch_loss[ 120 ]:  25.163803100585938
train_batch_loss[ 130 ]:  25.19082260131836
train_batch_loss[ 140 ]:  25.744182586669922
train_batch_loss[ 150 ]:  23.48008918762207
train_batch_loss[ 160 ]:  25.617610931396484
train_batch_loss[ 170 ]:  24.29474639892578
train_batch_loss[ 180 ]:  24.26898956298828
train_batch_loss[ 190 ]:  24.93378448486328
train_batch_loss[ 200 ]:  23.547264099121094
train_batch_loss[ 210 ]:  24.399791717529297
train_batch_loss[ 220 ]:  24.572406768798828
train_batc

train_batch_loss[ 1830 ]:  7.729409694671631
train_batch_loss[ 1840 ]:  7.510095119476318
train_batch_loss[ 1850 ]:  6.580531120300293
train_batch_loss[ 1860 ]:  5.414301872253418
train_batch_loss[ 1870 ]:  7.960390090942383
train_batch_loss[ 1880 ]:  6.4452667236328125
train_batch_loss[ 1890 ]:  6.497190475463867
train_batch_loss[ 1900 ]:  5.6955366134643555
train_batch_loss[ 1910 ]:  6.519881725311279
train_batch_loss[ 1920 ]:  5.64987850189209
train_batch_loss[ 1930 ]:  6.749014854431152
train_batch_loss[ 1940 ]:  6.6213483810424805
train_batch_loss[ 1950 ]:  7.094559669494629
train_batch_loss[ 1960 ]:  6.80831241607666
train_batch_loss[ 1970 ]:  6.873623847961426
train_batch_loss[ 1980 ]:  4.381885051727295
train_batch_loss[ 1990 ]:  6.039064407348633
train_batch_loss[ 2000 ]:  7.4137420654296875
train_batch_loss[ 2010 ]:  5.8848090171813965
train_batch_loss[ 2020 ]:  7.135617256164551
train_batch_loss[ 2030 ]:  8.15733528137207
train_batch_loss[ 2040 ]:  5.879508018493652
train_ba

train_batch_loss[ 3650 ]:  3.845104932785034
train_batch_loss[ 3660 ]:  3.4121932983398438
train_batch_loss[ 3670 ]:  3.1982998847961426
train_batch_loss[ 3680 ]:  3.077939987182617
train_batch_loss[ 3690 ]:  3.9100661277770996
train_batch_loss[ 3700 ]:  3.0077247619628906
train_batch_loss[ 3710 ]:  6.722925186157227
train_batch_loss[ 3720 ]:  3.780386447906494
train_batch_loss[ 3730 ]:  3.017224073410034
train_batch_loss[ 3740 ]:  2.735743522644043
train_batch_loss[ 3750 ]:  3.922070026397705
train_batch_loss[ 3760 ]:  2.70420503616333
train_batch_loss[ 3770 ]:  3.466705322265625
train_batch_loss[ 3780 ]:  2.820164680480957
train_batch_loss[ 3790 ]:  3.1432433128356934
train_batch_loss[ 3800 ]:  4.80106258392334
train_batch_loss[ 3810 ]:  5.483813762664795
train_batch_loss[ 3820 ]:  3.9595232009887695
train_batch_loss[ 3830 ]:  3.122011184692383
train_batch_loss[ 3840 ]:  2.8177084922790527
train_batch_loss[ 3850 ]:  3.9246699810028076
train_batch_loss[ 3860 ]:  1.4353463649749756
tra

Evaluate:   8%|█████▊                                                               | 132/1568 [01:26<15:19,  1.56it/s]

Corrupted image for 467544


Evaluate:  35%|████████████████████████▍                                            | 554/1568 [06:10<11:03,  1.53it/s]

Corrupted image for 419552


Evaluate:  54%|█████████████████████████████████████▌                               | 853/1568 [09:30<07:45,  1.54it/s]

Corrupted image for 620308


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [17:25<00:00,  1.50it/s]


valid_evaluation: loss=3.571953409221544, acc=0.6003806989612998
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/crnn_004000_loss3.571953409221544.pt
train_batch_loss[ 4010 ]:  4.2076215744018555
train_batch_loss[ 4020 ]:  2.9297165870666504
train_batch_loss[ 4030 ]:  3.291121482849121
train_batch_loss[ 4040 ]:  3.550427198410034
train_batch_loss[ 4050 ]:  3.0700747966766357
train_batch_loss[ 4060 ]:  5.01856803894043
train_batch_loss[ 4070 ]:  4.333461761474609
train_batch_loss[ 4080 ]:  2.8569579124450684
train_batch_loss[ 4090 ]:  2.6238842010498047
train_batch_loss[ 4100 ]:  4.533390998840332
train_batch_loss[ 4110 ]:  2.377347707748413
train_batch_loss[ 4120 ]:  2.5413010120391846
train_batch_loss[ 4130 ]:  2.5092344284057617
train_batch_loss[ 4140 ]:  3.8716516494750977
train_batch_loss[ 4150 ]:  2.6577811241149902
train_batch_loss[ 4160 ]:  2.287950038909912
train_batch_loss[ 4170 ]:  4.658841133117676
train_batch_loss[ 4180 ]:  4.461198806762695
train_batch_loss

train_batch_loss[ 5770 ]:  4.74146842956543
train_batch_loss[ 5780 ]:  3.0368824005126953
train_batch_loss[ 5790 ]:  2.9059808254241943
train_batch_loss[ 5800 ]:  3.3094520568847656
train_batch_loss[ 5810 ]:  1.9302725791931152
train_batch_loss[ 5820 ]:  2.748979330062866
train_batch_loss[ 5830 ]:  2.749940872192383
train_batch_loss[ 5840 ]:  2.9494740962982178
train_batch_loss[ 5850 ]:  3.010589122772217
train_batch_loss[ 5860 ]:  2.7142255306243896
train_batch_loss[ 5870 ]:  2.7262911796569824
train_batch_loss[ 5880 ]:  2.4537596702575684
train_batch_loss[ 5890 ]:  2.9167089462280273
train_batch_loss[ 5900 ]:  2.7068872451782227
train_batch_loss[ 5910 ]:  2.326235294342041
train_batch_loss[ 5920 ]:  2.5870718955993652
train_batch_loss[ 5930 ]:  2.922668695449829
train_batch_loss[ 5940 ]:  2.4923362731933594
train_batch_loss[ 5950 ]:  3.0946624279022217
train_batch_loss[ 5960 ]:  1.828157663345337
train_batch_loss[ 5970 ]:  2.0469717979431152
train_batch_loss[ 5980 ]:  4.8946704864501

train_batch_loss[ 7570 ]:  2.5125112533569336
train_batch_loss[ 7580 ]:  2.5146067142486572
train_batch_loss[ 7590 ]:  2.4928388595581055
train_batch_loss[ 7600 ]:  0.9997672438621521
train_batch_loss[ 7610 ]:  1.7692066431045532
train_batch_loss[ 7620 ]:  3.2548012733459473
train_batch_loss[ 7630 ]:  2.9990715980529785
train_batch_loss[ 7640 ]:  3.189979314804077
train_batch_loss[ 7650 ]:  1.6716307401657104
train_batch_loss[ 7660 ]:  3.4509952068328857
train_batch_loss[ 7670 ]:  3.281249761581421
train_batch_loss[ 7680 ]:  3.4106783866882324
train_batch_loss[ 7690 ]:  1.3683762550354004
train_batch_loss[ 7700 ]:  2.48923397064209
train_batch_loss[ 7710 ]:  2.977865695953369
train_batch_loss[ 7720 ]:  2.619938611984253
train_batch_loss[ 7730 ]:  2.027721405029297
train_batch_loss[ 7740 ]:  1.6033554077148438
train_batch_loss[ 7750 ]:  3.127915859222412
train_batch_loss[ 7760 ]:  2.3028855323791504
train_batch_loss[ 7770 ]:  2.338704824447632
train_batch_loss[ 7780 ]:  2.54338717460632

Evaluate:  50%|██████████████████████████████████▊                                  | 790/1568 [09:04<09:02,  1.43it/s]

Corrupted image for 419552


Evaluate:  68%|██████████████████████████████████████████████▎                     | 1067/1568 [12:19<05:45,  1.45it/s]

Corrupted image for 467544


Evaluate:  86%|██████████████████████████████████████████████████████████▍         | 1348/1568 [15:37<02:45,  1.33it/s]

Corrupted image for 620308


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [18:11<00:00,  1.44it/s]


valid_evaluation: loss=2.5054689123784213, acc=0.705442898893033
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/crnn_008000_loss2.5054689123784213.pt
train_batch_loss[ 8010 ]:  3.535879611968994
train_batch_loss[ 8020 ]:  1.0796983242034912
train_batch_loss[ 8030 ]:  2.1877758502960205
train_batch_loss[ 8040 ]:  2.0861151218414307
train_batch_loss[ 8050 ]:  1.821050763130188
train_batch_loss[ 8060 ]:  2.2311558723449707
train_batch_loss[ 8070 ]:  2.6751394271850586
train_batch_loss[ 8080 ]:  1.4625279903411865
train_batch_loss[ 8090 ]:  3.0053889751434326
train_batch_loss[ 8100 ]:  2.5889267921447754
train_batch_loss[ 8110 ]:  1.1123661994934082
train_batch_loss[ 8120 ]:  2.40871262550354
train_batch_loss[ 8130 ]:  1.8892927169799805
train_batch_loss[ 8140 ]:  2.9272689819335938
train_batch_loss[ 8150 ]:  1.822685718536377
train_batch_loss[ 8160 ]:  3.094296932220459
train_batch_loss[ 8170 ]:  2.5953962802886963
train_batch_loss[ 8180 ]:  2.237755060195923
train_batch_

train_batch_loss[ 9780 ]:  2.556668519973755
train_batch_loss[ 9790 ]:  2.2597827911376953
train_batch_loss[ 9800 ]:  1.6333407163619995
train_batch_loss[ 9810 ]:  2.18269681930542
train_batch_loss[ 9820 ]:  2.357543468475342
train_batch_loss[ 9830 ]:  2.7919373512268066
train_batch_loss[ 9840 ]:  1.5661089420318604
train_batch_loss[ 9850 ]:  2.4640769958496094
train_batch_loss[ 9860 ]:  2.3886802196502686
train_batch_loss[ 9870 ]:  2.194545030593872
train_batch_loss[ 9880 ]:  2.302319049835205
train_batch_loss[ 9890 ]:  2.250154972076416
train_batch_loss[ 9900 ]:  3.5251026153564453
train_batch_loss[ 9910 ]:  1.8129574060440063
train_batch_loss[ 9920 ]:  2.483325958251953
train_batch_loss[ 9930 ]:  2.2635769844055176
train_batch_loss[ 9940 ]:  1.7653120756149292
train_batch_loss[ 9950 ]:  1.272425889968872
train_batch_loss[ 9960 ]:  2.3043341636657715
train_batch_loss[ 9970 ]:  1.4025273323059082
train_batch_loss[ 9980 ]:  2.5746755599975586
train_batch_loss[ 9990 ]:  2.01598310470581

train_batch_loss[ 11550 ]:  2.946181297302246
train_batch_loss[ 11560 ]:  2.310314655303955
train_batch_loss[ 11570 ]:  1.1443618535995483
train_batch_loss[ 11580 ]:  1.7575520277023315
train_batch_loss[ 11590 ]:  1.452408790588379
train_batch_loss[ 11600 ]:  1.178261160850525
train_batch_loss[ 11610 ]:  2.331188678741455
train_batch_loss[ 11620 ]:  3.09005069732666
train_batch_loss[ 11630 ]:  2.97591233253479
train_batch_loss[ 11640 ]:  2.1721396446228027
train_batch_loss[ 11650 ]:  1.5120103359222412
train_batch_loss[ 11660 ]:  1.749787449836731
train_batch_loss[ 11670 ]:  2.360459566116333
train_batch_loss[ 11680 ]:  2.399797201156616
train_batch_loss[ 11690 ]:  2.383331537246704
train_batch_loss[ 11700 ]:  2.1742231845855713
train_batch_loss[ 11710 ]:  2.057023048400879
train_batch_loss[ 11720 ]:  2.278972864151001
train_batch_loss[ 11730 ]:  2.1704888343811035
train_batch_loss[ 11740 ]:  2.2267537117004395
train_batch_loss[ 11750 ]:  3.1662216186523438
train_batch_loss[ 11760 ]:  

Evaluate:  24%|████████████████▍                                                    | 373/1568 [03:59<13:01,  1.53it/s]

Corrupted image for 467544


Evaluate:  40%|███████████████████████████▌                                         | 627/1568 [06:52<12:07,  1.29it/s]

Corrupted image for 620308


Evaluate:  43%|█████████████████████████████▍                                       | 669/1568 [07:20<09:56,  1.51it/s]

Corrupted image for 419552


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [17:24<00:00,  1.50it/s]


valid_evaluation: loss=2.1106627287037565, acc=0.749344116481923
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/crnn_012000_loss2.1106627287037565.pt
train_batch_loss[ 12010 ]:  1.640192985534668
train_batch_loss[ 12020 ]:  2.635739326477051
train_batch_loss[ 12030 ]:  2.640399694442749
train_batch_loss[ 12040 ]:  2.3996975421905518
train_batch_loss[ 12050 ]:  1.1350815296173096
train_batch_loss[ 12060 ]:  2.1631827354431152
train_batch_loss[ 12070 ]:  1.7667219638824463
train_batch_loss[ 12080 ]:  2.462158679962158
train_batch_loss[ 12090 ]:  2.434093475341797
train_batch_loss[ 12100 ]:  1.1621108055114746
train_batch_loss[ 12110 ]:  1.2897239923477173
train_batch_loss[ 12120 ]:  1.4582241773605347
train_batch_loss[ 12130 ]:  2.0874576568603516
train_batch_loss[ 12140 ]:  1.267080545425415
train_batch_loss[ 12150 ]:  3.227618932723999
train_batch_loss[ 12160 ]:  2.1352200508117676
train_batch_loss[ 12170 ]:  1.8626046180725098
train_batch_loss[ 12180 ]:  1.35764944553

train_batch_loss[ 13740 ]:  2.5217766761779785
train_batch_loss[ 13750 ]:  2.3204028606414795
train_batch_loss[ 13760 ]:  1.9612303972244263
train_batch_loss[ 13770 ]:  2.8303914070129395
train_batch_loss[ 13780 ]:  3.6471338272094727
train_batch_loss[ 13790 ]:  2.6561763286590576
train_batch_loss[ 13800 ]:  1.0827395915985107
train_batch_loss[ 13810 ]:  3.5969042778015137
train_batch_loss[ 13820 ]:  1.2004897594451904
train_batch_loss[ 13830 ]:  1.5016883611679077
train_batch_loss[ 13840 ]:  0.9046741724014282
train_batch_loss[ 13850 ]:  1.0495789051055908
train_batch_loss[ 13860 ]:  2.252368927001953
train_batch_loss[ 13870 ]:  3.0904417037963867
train_batch_loss[ 13880 ]:  2.253667116165161
train_batch_loss[ 13890 ]:  1.7398924827575684
train_batch_loss[ 13900 ]:  3.5218796730041504
train_batch_loss[ 13910 ]:  1.7166627645492554
train_batch_loss[ 13920 ]:  1.5213220119476318
train_batch_loss[ 13930 ]:  1.4078761339187622
train_batch_loss[ 13940 ]:  1.9262062311172485
train_batch_los

train_batch_loss[ 15500 ]:  2.0919189453125
train_batch_loss[ 15510 ]:  1.5334502458572388
train_batch_loss[ 15520 ]:  1.8956589698791504
train_batch_loss[ 15530 ]:  1.5814287662506104
train_batch_loss[ 15540 ]:  1.0194823741912842
train_batch_loss[ 15550 ]:  2.2578697204589844
train_batch_loss[ 15560 ]:  2.387580394744873
train_batch_loss[ 15570 ]:  1.7587038278579712
train_batch_loss[ 15580 ]:  1.6855106353759766
train_batch_loss[ 15590 ]:  0.8524782657623291
train_batch_loss[ 15600 ]:  1.9975614547729492
train_batch_loss[ 15610 ]:  1.6730976104736328
train_batch_loss[ 15620 ]:  3.031749725341797
train_batch_loss[ 15630 ]:  3.2178359031677246
train_batch_loss[ 15640 ]:  1.092455506324768
train_batch_loss[ 15650 ]:  2.7068610191345215
train_batch_loss[ 15660 ]:  3.010481834411621
train_batch_loss[ 15670 ]:  1.3949495553970337
train_batch_loss[ 15680 ]:  1.8687348365783691
train_batch_loss[ 15690 ]:  2.053699016571045
train_batch_loss[ 15700 ]:  1.8908313512802124
train_batch_loss[ 157

Evaluate:  44%|██████████████████████████████▋                                      | 696/1568 [07:51<09:28,  1.53it/s]

Corrupted image for 419552


Evaluate:  52%|███████████████████████████████████▉                                 | 817/1568 [09:16<08:56,  1.40it/s]

Corrupted image for 620308


Evaluate:  90%|████████████████████████████████████████████████████████████▉       | 1404/1568 [16:08<01:57,  1.40it/s]

Corrupted image for 467544


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [18:03<00:00,  1.45it/s]


valid_evaluation: loss=1.9079182894806852, acc=0.7750113985454709
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/crnn_016000_loss1.9079182894806852.pt
train_batch_loss[ 16010 ]:  1.8135981559753418
train_batch_loss[ 16020 ]:  2.066965103149414
train_batch_loss[ 16030 ]:  2.5759098529815674
train_batch_loss[ 16040 ]:  1.3181965351104736
train_batch_loss[ 16050 ]:  1.6125725507736206
train_batch_loss[ 16060 ]:  2.7425026893615723
train_batch_loss[ 16070 ]:  1.4408233165740967
train_batch_loss[ 16080 ]:  1.0777442455291748
train_batch_loss[ 16090 ]:  2.685962438583374
train_batch_loss[ 16100 ]:  2.268131732940674
train_batch_loss[ 16110 ]:  1.614823579788208
train_batch_loss[ 16120 ]:  2.134455680847168
train_batch_loss[ 16130 ]:  2.261246681213379
train_batch_loss[ 16140 ]:  2.3869757652282715
train_batch_loss[ 16150 ]:  2.1598384380340576
train_batch_loss[ 16160 ]:  1.01389741897583
train_batch_loss[ 16170 ]:  1.9268748760223389
train_batch_loss[ 16180 ]:  1.61244034767

train_batch_loss[ 17740 ]:  1.387070655822754
train_batch_loss[ 17750 ]:  1.945575475692749
train_batch_loss[ 17760 ]:  1.878149151802063
train_batch_loss[ 17770 ]:  1.668943166732788
Corrupted image for 6396080
train_batch_loss[ 17780 ]:  1.5024263858795166
train_batch_loss[ 17790 ]:  0.7900297045707703
train_batch_loss[ 17800 ]:  2.089132785797119
train_batch_loss[ 17810 ]:  1.1878429651260376
train_batch_loss[ 17820 ]:  1.103269338607788
train_batch_loss[ 17830 ]:  1.874809741973877
train_batch_loss[ 17840 ]:  1.7664456367492676
train_batch_loss[ 17850 ]:  1.589345097541809
train_batch_loss[ 17860 ]:  0.6606491804122925
train_batch_loss[ 17870 ]:  1.1941521167755127
train_batch_loss[ 17880 ]:  1.0787376165390015
train_batch_loss[ 17890 ]:  1.7326042652130127
train_batch_loss[ 17900 ]:  1.4628748893737793
train_batch_loss[ 17910 ]:  1.1491789817810059
train_batch_loss[ 17920 ]:  0.9135401248931885
train_batch_loss[ 17930 ]:  1.547655701637268
train_batch_loss[ 17940 ]:  1.56741857528

train_batch_loss[ 19480 ]:  1.0236942768096924
train_batch_loss[ 19490 ]:  2.5944786071777344
train_batch_loss[ 19500 ]:  2.157829761505127
train_batch_loss[ 19510 ]:  2.9681620597839355
train_batch_loss[ 19520 ]:  2.2226994037628174
train_batch_loss[ 19530 ]:  1.9156720638275146
train_batch_loss[ 19540 ]:  1.837035894393921
train_batch_loss[ 19550 ]:  1.750307321548462
train_batch_loss[ 19560 ]:  2.075000762939453
train_batch_loss[ 19570 ]:  0.9149093627929688
train_batch_loss[ 19580 ]:  1.4819141626358032
train_batch_loss[ 19590 ]:  1.460514783859253
train_batch_loss[ 19600 ]:  1.5696741342544556
train_batch_loss[ 19610 ]:  1.3839225769042969
train_batch_loss[ 19620 ]:  1.7071735858917236
train_batch_loss[ 19630 ]:  1.9206695556640625
train_batch_loss[ 19640 ]:  1.0643889904022217
train_batch_loss[ 19650 ]:  1.0574578046798706
train_batch_loss[ 19660 ]:  1.559185266494751
train_batch_loss[ 19670 ]:  1.7642638683319092
train_batch_loss[ 19680 ]:  2.738027572631836
train_batch_loss[ 19

Evaluate:   1%|▋                                                                     | 15/1568 [00:10<16:45,  1.54it/s]

Corrupted image for 467544


Evaluate:  56%|██████████████████████████████████████▊                              | 883/1568 [09:59<07:49,  1.46it/s]

Corrupted image for 620308


Evaluate:  80%|██████████████████████████████████████████████████████▎             | 1253/1568 [14:08<03:32,  1.48it/s]

Corrupted image for 419552


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [17:39<00:00,  1.48it/s]


valid_evaluation: loss=1.7631605052355646, acc=0.7895928165494422
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/crnn_020000_loss1.7631605052355646.pt
train_batch_loss[ 20010 ]:  2.0233161449432373
train_batch_loss[ 20020 ]:  1.4183098077774048
train_batch_loss[ 20030 ]:  1.6829068660736084
train_batch_loss[ 20040 ]:  2.288858652114868
train_batch_loss[ 20050 ]:  2.3236818313598633
train_batch_loss[ 20060 ]:  0.8894456028938293
train_batch_loss[ 20070 ]:  2.2404398918151855
train_batch_loss[ 20080 ]:  1.3993701934814453
train_batch_loss[ 20090 ]:  1.417893409729004
train_batch_loss[ 20100 ]:  1.5870329141616821
train_batch_loss[ 20110 ]:  1.6594579219818115
train_batch_loss[ 20120 ]:  0.9433706998825073
train_batch_loss[ 20130 ]:  2.188706159591675
train_batch_loss[ 20140 ]:  2.01784610748291
train_batch_loss[ 20150 ]:  1.6353119611740112
train_batch_loss[ 20160 ]:  2.003936767578125
train_batch_loss[ 20170 ]:  2.543461322784424
train_batch_loss[ 20180 ]:  2.6566591262

train_batch_loss[ 21730 ]:  2.2728075981140137
train_batch_loss[ 21740 ]:  2.2431464195251465
train_batch_loss[ 21750 ]:  1.1990712881088257
train_batch_loss[ 21760 ]:  1.1049559116363525
train_batch_loss[ 21770 ]:  1.5148577690124512
train_batch_loss[ 21780 ]:  1.1626126766204834
train_batch_loss[ 21790 ]:  2.2236170768737793
train_batch_loss[ 21800 ]:  1.1375908851623535
train_batch_loss[ 21810 ]:  1.3595696687698364
train_batch_loss[ 21820 ]:  2.0562245845794678
train_batch_loss[ 21830 ]:  1.5222439765930176
train_batch_loss[ 21840 ]:  1.8895976543426514
train_batch_loss[ 21850 ]:  1.7264063358306885
train_batch_loss[ 21860 ]:  1.1336033344268799
train_batch_loss[ 21870 ]:  2.85817813873291
train_batch_loss[ 21880 ]:  1.1210005283355713
train_batch_loss[ 21890 ]:  2.658095359802246
train_batch_loss[ 21900 ]:  0.5299156904220581
train_batch_loss[ 21910 ]:  1.401865839958191
train_batch_loss[ 21920 ]:  2.2655229568481445
train_batch_loss[ 21930 ]:  0.7871014475822449
train_batch_loss[

train_batch_loss[ 23490 ]:  1.6607675552368164
train_batch_loss[ 23500 ]:  1.073465347290039
train_batch_loss[ 23510 ]:  1.1425952911376953
train_batch_loss[ 23520 ]:  1.9688141345977783
train_batch_loss[ 23530 ]:  1.571183681488037
train_batch_loss[ 23540 ]:  2.1070122718811035
train_batch_loss[ 23550 ]:  0.9546140432357788
train_batch_loss[ 23560 ]:  1.0294232368469238
train_batch_loss[ 23570 ]:  1.8891382217407227
train_batch_loss[ 23580 ]:  2.0255017280578613
train_batch_loss[ 23590 ]:  2.1053733825683594
train_batch_loss[ 23600 ]:  1.939706802368164
train_batch_loss[ 23610 ]:  1.0918235778808594
train_batch_loss[ 23620 ]:  1.0598037242889404
train_batch_loss[ 23630 ]:  1.7056186199188232
train_batch_loss[ 23640 ]:  1.3583307266235352
train_batch_loss[ 23650 ]:  1.5137107372283936
train_batch_loss[ 23660 ]:  1.6611634492874146
train_batch_loss[ 23670 ]:  2.3935706615448
train_batch_loss[ 23680 ]:  1.3832966089248657
train_batch_loss[ 23690 ]:  2.31862735748291
train_batch_loss[ 237

Evaluate:  20%|█████████████▊                                                       | 315/1568 [03:31<15:53,  1.31it/s]

Corrupted image for 419552


Evaluate:  25%|█████████████████▌                                                   | 399/1568 [04:28<13:17,  1.47it/s]

Corrupted image for 620308


Evaluate:  50%|██████████████████████████████████▎                                  | 780/1568 [08:51<09:30,  1.38it/s]

Corrupted image for 467544


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [18:02<00:00,  1.45it/s]


valid_evaluation: loss=1.655817328797268, acc=0.8002052983927428
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/crnn_024000_loss1.655817328797268.pt
train_batch_loss[ 24010 ]:  1.3958313465118408
train_batch_loss[ 24020 ]:  1.817378282546997
train_batch_loss[ 24030 ]:  2.2151174545288086
train_batch_loss[ 24040 ]:  1.5607035160064697
train_batch_loss[ 24050 ]:  1.538818597793579
train_batch_loss[ 24060 ]:  1.8371937274932861
train_batch_loss[ 24070 ]:  1.2800395488739014
train_batch_loss[ 24080 ]:  1.8179383277893066
train_batch_loss[ 24090 ]:  2.088531494140625
train_batch_loss[ 24100 ]:  0.8652429580688477
train_batch_loss[ 24110 ]:  1.7742977142333984
train_batch_loss[ 24120 ]:  2.10654878616333
train_batch_loss[ 24130 ]:  2.072874069213867
train_batch_loss[ 24140 ]:  1.5364816188812256
train_batch_loss[ 24150 ]:  1.9600273370742798
train_batch_loss[ 24160 ]:  1.419754981994629
train_batch_loss[ 24170 ]:  2.284472942352295
train_batch_loss[ 24180 ]:  1.2634505033493

train_batch_loss[ 25720 ]:  1.7432711124420166
train_batch_loss[ 25730 ]:  1.8138787746429443
train_batch_loss[ 25740 ]:  0.9850970506668091
train_batch_loss[ 25750 ]:  1.5337132215499878
train_batch_loss[ 25760 ]:  1.9022245407104492
train_batch_loss[ 25770 ]:  2.3187999725341797
train_batch_loss[ 25780 ]:  1.7814974784851074
train_batch_loss[ 25790 ]:  1.7998977899551392
train_batch_loss[ 25800 ]:  0.7799630165100098
train_batch_loss[ 25810 ]:  1.4139633178710938
train_batch_loss[ 25820 ]:  1.4290497303009033
train_batch_loss[ 25830 ]:  1.293107271194458
train_batch_loss[ 25840 ]:  0.8496479392051697
train_batch_loss[ 25850 ]:  1.675426721572876
train_batch_loss[ 25860 ]:  2.206493854522705
train_batch_loss[ 25870 ]:  0.9504438638687134
train_batch_loss[ 25880 ]:  0.6356498003005981
train_batch_loss[ 25890 ]:  0.8577628135681152
train_batch_loss[ 25900 ]:  0.7627681493759155
train_batch_loss[ 25910 ]:  1.975507378578186
train_batch_loss[ 25920 ]:  1.9065845012664795
train_batch_loss[

train_batch_loss[ 27480 ]:  1.1449192762374878
train_batch_loss[ 27490 ]:  2.1994733810424805
train_batch_loss[ 27500 ]:  1.908794641494751
train_batch_loss[ 27510 ]:  1.5690484046936035
train_batch_loss[ 27520 ]:  1.7071115970611572
train_batch_loss[ 27530 ]:  2.2422738075256348
train_batch_loss[ 27540 ]:  2.7230896949768066
train_batch_loss[ 27550 ]:  1.5130813121795654
train_batch_loss[ 27560 ]:  1.158400535583496
train_batch_loss[ 27570 ]:  1.4783732891082764
train_batch_loss[ 27580 ]:  2.398667573928833
train_batch_loss[ 27590 ]:  1.002725601196289
train_batch_loss[ 27600 ]:  1.1897287368774414
train_batch_loss[ 27610 ]:  1.8142156600952148
train_batch_loss[ 27620 ]:  1.0338104963302612
train_batch_loss[ 27630 ]:  1.3387469053268433
train_batch_loss[ 27640 ]:  2.492715835571289
train_batch_loss[ 27650 ]:  1.0938047170639038
train_batch_loss[ 27660 ]:  2.0843451023101807
train_batch_loss[ 27670 ]:  0.939992368221283
train_batch_loss[ 27680 ]:  1.5574517250061035
train_batch_loss[ 2

Evaluate:  50%|██████████████████████████████████▎                                  | 781/1568 [08:52<08:35,  1.53it/s]

Corrupted image for 620308


Evaluate:  71%|████████████████████████████████████████████████▌                   | 1121/1568 [12:44<05:16,  1.41it/s]

Corrupted image for 467544


Evaluate: 100%|███████████████████████████████████████████████████████████████████▉| 1567/1568 [17:52<00:00,  1.47it/s]

Corrupted image for 419552


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [17:52<00:00,  1.46it/s]


valid_evaluation: loss=1.5554297052585242, acc=0.8138860444431156
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/crnn_028000_loss1.5554297052585242.pt
train_batch_loss[ 28010 ]:  1.2331300973892212
train_batch_loss[ 28020 ]:  1.074973464012146
train_batch_loss[ 28030 ]:  1.0991559028625488
train_batch_loss[ 28040 ]:  1.3657946586608887
train_batch_loss[ 28050 ]:  0.9851486086845398
train_batch_loss[ 28060 ]:  0.9207973480224609
train_batch_loss[ 28070 ]:  1.2090961933135986
train_batch_loss[ 28080 ]:  1.2511308193206787
train_batch_loss[ 28090 ]:  1.4025483131408691
train_batch_loss[ 28100 ]:  1.3021833896636963
train_batch_loss[ 28110 ]:  1.2387306690216064
train_batch_loss[ 28120 ]:  1.3822128772735596
train_batch_loss[ 28130 ]:  2.5151381492614746
train_batch_loss[ 28140 ]:  1.4592891931533813
train_batch_loss[ 28150 ]:  0.9528502225875854
train_batch_loss[ 28160 ]:  0.7863971590995789
train_batch_loss[ 28170 ]:  1.8215724229812622
train_batch_loss[ 28180 ]:  0.5480

train_batch_loss[ 29730 ]:  1.5973984003067017
train_batch_loss[ 29740 ]:  0.9175925254821777
train_batch_loss[ 29750 ]:  1.3114864826202393
train_batch_loss[ 29760 ]:  1.1638786792755127
train_batch_loss[ 29770 ]:  1.5179240703582764
train_batch_loss[ 29780 ]:  1.38776433467865
train_batch_loss[ 29790 ]:  1.4266867637634277
train_batch_loss[ 29800 ]:  1.2404148578643799
train_batch_loss[ 29810 ]:  1.3983339071273804
train_batch_loss[ 29820 ]:  1.012574553489685
train_batch_loss[ 29830 ]:  0.8766895532608032
train_batch_loss[ 29840 ]:  1.7045965194702148
train_batch_loss[ 29850 ]:  0.8705460429191589
train_batch_loss[ 29860 ]:  1.3588591814041138
train_batch_loss[ 29870 ]:  2.2940280437469482
train_batch_loss[ 29880 ]:  2.666059970855713
train_batch_loss[ 29890 ]:  1.5116698741912842
train_batch_loss[ 29900 ]:  1.0377109050750732
train_batch_loss[ 29910 ]:  1.670417308807373
Corrupted image for 1619592
train_batch_loss[ 29920 ]:  0.8114583492279053
Corrupted image for 1193907
train_bat

train_batch_loss[ 31470 ]:  1.5751243829727173
train_batch_loss[ 31480 ]:  0.9034552574157715
train_batch_loss[ 31490 ]:  1.5908699035644531
train_batch_loss[ 31500 ]:  1.4111518859863281
train_batch_loss[ 31510 ]:  1.8369064331054688
train_batch_loss[ 31520 ]:  1.9238109588623047
train_batch_loss[ 31530 ]:  2.524867534637451
train_batch_loss[ 31540 ]:  1.0070056915283203
train_batch_loss[ 31550 ]:  1.2444448471069336
train_batch_loss[ 31560 ]:  1.4930124282836914
train_batch_loss[ 31570 ]:  1.6676911115646362
train_batch_loss[ 31580 ]:  1.4338860511779785
train_batch_loss[ 31590 ]:  1.7471320629119873
train_batch_loss[ 31600 ]:  0.5740116834640503
train_batch_loss[ 31610 ]:  1.8707863092422485
train_batch_loss[ 31620 ]:  1.9971632957458496
train_batch_loss[ 31630 ]:  1.1558418273925781
train_batch_loss[ 31640 ]:  1.2386412620544434
train_batch_loss[ 31650 ]:  1.8169149160385132
Corrupted image for 2171525
train_batch_loss[ 31660 ]:  1.1340835094451904
train_batch_loss[ 31670 ]:  1.109

Evaluate:  22%|███████████████                                                      | 343/1568 [03:50<13:39,  1.50it/s]

Corrupted image for 467544


Evaluate:  51%|███████████████████████████████████                                  | 797/1568 [09:00<08:47,  1.46it/s]

Corrupted image for 419552


Evaluate:  75%|██████████████████████████████████████████████████▉                 | 1174/1568 [13:22<04:26,  1.48it/s]

Corrupted image for 620308


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [17:54<00:00,  1.46it/s]


valid_evaluation: loss=1.5331961985217035, acc=0.8174924694855332
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/crnn_032000_loss1.5331961985217035.pt
train_batch_loss[ 32010 ]:  0.6943706274032593
train_batch_loss[ 32020 ]:  1.356973648071289
train_batch_loss[ 32030 ]:  1.1720134019851685
train_batch_loss[ 32040 ]:  1.9574649333953857
train_batch_loss[ 32050 ]:  1.9381215572357178
train_batch_loss[ 32060 ]:  1.3604902029037476
train_batch_loss[ 32070 ]:  1.4709051847457886
train_batch_loss[ 32080 ]:  1.7946255207061768
train_batch_loss[ 32090 ]:  2.2964367866516113
train_batch_loss[ 32100 ]:  0.867591917514801
train_batch_loss[ 32110 ]:  2.0983569622039795
train_batch_loss[ 32120 ]:  1.2734662294387817
train_batch_loss[ 32130 ]:  1.5944957733154297
train_batch_loss[ 32140 ]:  1.5943137407302856
train_batch_loss[ 32150 ]:  1.189049243927002
train_batch_loss[ 32160 ]:  1.1433897018432617
train_batch_loss[ 32170 ]:  1.2892405986785889
train_batch_loss[ 32180 ]:  2.128203

train_batch_loss[ 33730 ]:  2.1603078842163086
train_batch_loss[ 33740 ]:  1.7787933349609375
train_batch_loss[ 33750 ]:  1.7795319557189941
train_batch_loss[ 33760 ]:  1.278836965560913
train_batch_loss[ 33770 ]:  1.7246739864349365
train_batch_loss[ 33780 ]:  1.0709530115127563
train_batch_loss[ 33790 ]:  1.1198676824569702
train_batch_loss[ 33800 ]:  1.1853673458099365
train_batch_loss[ 33810 ]:  1.2287620306015015
train_batch_loss[ 33820 ]:  0.7437242865562439
train_batch_loss[ 33830 ]:  1.346562147140503
train_batch_loss[ 33840 ]:  1.003336787223816
train_batch_loss[ 33850 ]:  1.8631569147109985
train_batch_loss[ 33860 ]:  2.17632794380188
train_batch_loss[ 33870 ]:  0.9260302782058716
train_batch_loss[ 33880 ]:  1.612938404083252
train_batch_loss[ 33890 ]:  1.5144647359848022
train_batch_loss[ 33900 ]:  1.616049885749817
train_batch_loss[ 33910 ]:  1.5532020330429077
train_batch_loss[ 33920 ]:  1.9298447370529175
train_batch_loss[ 33930 ]:  2.5257976055145264
train_batch_loss[ 33

train_batch_loss[ 35480 ]:  0.49087807536125183
train_batch_loss[ 35490 ]:  1.1558079719543457
train_batch_loss[ 35500 ]:  1.7086894512176514
train_batch_loss[ 35510 ]:  0.6766329407691956
train_batch_loss[ 35520 ]:  1.6852831840515137
train_batch_loss[ 35530 ]:  1.9048548936843872
train_batch_loss[ 35540 ]:  1.3493270874023438
train_batch_loss[ 35550 ]:  2.8429198265075684
train_batch_loss[ 35560 ]:  1.8540095090866089
train_batch_loss[ 35570 ]:  2.6885581016540527
train_batch_loss[ 35580 ]:  1.9812572002410889
train_batch_loss[ 35590 ]:  1.361724853515625
train_batch_loss[ 35600 ]:  1.4240734577178955
train_batch_loss[ 35610 ]:  0.6569034457206726
train_batch_loss[ 35620 ]:  1.4525190591812134
train_batch_loss[ 35630 ]:  1.7816243171691895
train_batch_loss[ 35640 ]:  1.121826410293579
train_batch_loss[ 35650 ]:  1.7364485263824463
train_batch_loss[ 35660 ]:  1.5625810623168945
train_batch_loss[ 35670 ]:  1.4176589250564575
train_batch_loss[ 35680 ]:  0.9542480707168579
train_batch_lo

Evaluate:  16%|██████████▉                                                          | 248/1568 [02:46<15:04,  1.46it/s]

Corrupted image for 620308


Evaluate:  26%|██████████████████▎                                                  | 415/1568 [04:41<13:47,  1.39it/s]

Corrupted image for 467544


Evaluate:  52%|███████████████████████████████████▉                                 | 816/1568 [09:13<08:25,  1.49it/s]

Corrupted image for 419552


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [17:51<00:00,  1.46it/s]


valid_evaluation: loss=1.4414411052854434, acc=0.8262948872229157
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/crnn_036000_loss1.4414411052854434.pt
train_batch_loss[ 36010 ]:  2.3450682163238525
train_batch_loss[ 36020 ]:  1.6438729763031006
train_batch_loss[ 36030 ]:  1.0437228679656982
train_batch_loss[ 36040 ]:  0.5783851146697998
train_batch_loss[ 36050 ]:  1.7447563409805298
train_batch_loss[ 36060 ]:  0.4097437262535095
train_batch_loss[ 36070 ]:  1.5727643966674805
train_batch_loss[ 36080 ]:  3.2746832370758057
train_batch_loss[ 36090 ]:  1.5111110210418701
train_batch_loss[ 36100 ]:  3.4426681995391846
train_batch_loss[ 36110 ]:  1.0566531419754028
train_batch_loss[ 36120 ]:  1.793357253074646
train_batch_loss[ 36130 ]:  0.7120516300201416
train_batch_loss[ 36140 ]:  1.0289983749389648
train_batch_loss[ 36150 ]:  1.357550859451294
train_batch_loss[ 36160 ]:  1.5241789817810059
train_batch_loss[ 36170 ]:  1.1608738899230957
train_batch_loss[ 36180 ]:  1.41014

train_batch_loss[ 37730 ]:  1.514281988143921
train_batch_loss[ 37740 ]:  0.8464874029159546
train_batch_loss[ 37750 ]:  0.29785895347595215
train_batch_loss[ 37760 ]:  1.6179436445236206
train_batch_loss[ 37770 ]:  3.717864990234375
train_batch_loss[ 37780 ]:  1.3022093772888184
train_batch_loss[ 37790 ]:  1.7325034141540527
train_batch_loss[ 37800 ]:  1.222071647644043
train_batch_loss[ 37810 ]:  2.1026039123535156
train_batch_loss[ 37820 ]:  2.526791572570801
train_batch_loss[ 37830 ]:  1.4100123643875122
train_batch_loss[ 37840 ]:  0.5826021432876587
train_batch_loss[ 37850 ]:  1.157867670059204
train_batch_loss[ 37860 ]:  0.6811391115188599
train_batch_loss[ 37870 ]:  1.4073662757873535
train_batch_loss[ 37880 ]:  0.9758453965187073
train_batch_loss[ 37890 ]:  1.131414532661438
train_batch_loss[ 37900 ]:  1.9255213737487793
train_batch_loss[ 37910 ]:  0.8895235657691956
train_batch_loss[ 37920 ]:  1.5340211391448975
train_batch_loss[ 37930 ]:  2.0412979125976562
train_batch_loss[ 

train_batch_loss[ 39490 ]:  0.7688331604003906
train_batch_loss[ 39500 ]:  1.7337828874588013
train_batch_loss[ 39510 ]:  1.2703081369400024
train_batch_loss[ 39520 ]:  1.344128966331482
train_batch_loss[ 39530 ]:  1.5409815311431885
train_batch_loss[ 39540 ]:  1.1875537633895874
train_batch_loss[ 39550 ]:  0.82716965675354
train_batch_loss[ 39560 ]:  1.2480049133300781
train_batch_loss[ 39570 ]:  1.2649034261703491
train_batch_loss[ 39580 ]:  0.7559416890144348
train_batch_loss[ 39590 ]:  2.325800895690918
train_batch_loss[ 39600 ]:  1.4733095169067383
train_batch_loss[ 39610 ]:  1.2949596643447876
train_batch_loss[ 39620 ]:  2.2717971801757812
train_batch_loss[ 39630 ]:  1.138403058052063
train_batch_loss[ 39640 ]:  0.5902848243713379
train_batch_loss[ 39650 ]:  1.3907852172851562
train_batch_loss[ 39660 ]:  0.8418992757797241
train_batch_loss[ 39670 ]:  1.1630094051361084
train_batch_loss[ 39680 ]:  1.558678388595581
train_batch_loss[ 39690 ]:  1.54398775100708
train_batch_loss[ 397

Evaluate:  10%|██████▊                                                              | 156/1568 [01:44<15:26,  1.52it/s]

Corrupted image for 620308


Evaluate:  30%|████████████████████▉                                                | 476/1568 [05:24<12:27,  1.46it/s]

Corrupted image for 419552


Evaluate:  71%|████████████████████████████████████████████████▏                   | 1110/1568 [12:38<05:12,  1.46it/s]

Corrupted image for 467544


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [17:57<00:00,  1.46it/s]

valid_evaluation: loss=1.3993982520835508, acc=0.8302139936766102
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/crnn_040000_loss1.3993982520835508.pt
train_loss:  2.7786078856851906





In [5]:

import os
import pickle

histories = [loss_history,valid_history]
with open('D:/lab2/深度学习课程设计/crnn-pytorch-master/train_history/crnn_1', 'wb') as fp:
    pickle.dump(histories, fp)

In [6]:
with open ('D:/lab2/深度学习课程设计/crnn-pytorch-master/train_history/crnn_1', 'rb') as fp:
    temp = pickle.load(fp)
temp

[[28.225263595581055,
  26.03909683227539,
  26.25438690185547,
  24.54315757751465,
  24.86879539489746,
  25.75336456298828,
  25.201152801513672,
  24.07999038696289,
  24.856430053710938,
  25.97860336303711,
  24.96133804321289,
  25.163803100585938,
  25.19082260131836,
  25.744182586669922,
  23.48008918762207,
  25.617610931396484,
  24.29474639892578,
  24.26898956298828,
  24.93378448486328,
  23.547264099121094,
  24.399791717529297,
  24.572406768798828,
  23.88490104675293,
  25.667999267578125,
  25.35882568359375,
  25.277908325195312,
  25.746557235717773,
  22.447723388671875,
  23.455978393554688,
  22.873191833496094,
  25.201431274414062,
  24.343748092651367,
  24.091259002685547,
  24.95233154296875,
  25.224937438964844,
  25.846519470214844,
  25.47356414794922,
  23.613039016723633,
  23.92202377319336,
  24.313255310058594,
  24.96070098876953,
  23.272903442382812,
  23.322437286376953,
  25.530223846435547,
  24.166048049926758,
  23.35240936279297,
  24.282

In [7]:
pytorch_total_params = sum(p.numel() for p in crnn.parameters())
pytorch_total_params

7839077

# mobilevit-RNN 

In [10]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import timm

class MobileVit_improv(nn.Module):
    def __init__(self,img_channel,  img_height, img_width,num_class,rnn_hidden=256):
        super(MobileVit_improv, self).__init__()
        self.model = timm.create_model('mobilevit_s.cvnets_in1k', pretrained=False,features_only=True)
        self.model.stem.conv = nn.Conv2d(1, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        
        map_to_seq_hidden = 1000
        #self.model.head.fc = nn.Linear(self.model.head.in_features, map_to_seq)
        output_channel, output_height, output_width = self._compute_output_shape(img_channel, img_height, img_width)
        
        self.map_to_seq = nn.Linear(output_channel * output_height, map_to_seq_hidden)
        self.rnn1 = nn.LSTM(map_to_seq_hidden, rnn_hidden, bidirectional=True)
        self.rnn2 = nn.LSTM(2 * rnn_hidden, rnn_hidden, bidirectional=True)

        self.dense = nn.Linear(2 * rnn_hidden, num_class)
        
    
    
    def forward(self, x):
        x = self.model(x)[2]
        #print(x)
        batch, channel, height, width = x.size()
        #print('batch, channel, height, width')
        #print(batch, channel, height, width)
        x = x.view(batch, channel * height, width)
        x = x.permute(2, 0, 1)  # (width, batch, feature)
        #print(x.size())
        seq = self.map_to_seq(x)

        recurrent, _ = self.rnn1(seq)
        recurrent, _ = self.rnn2(recurrent)

        output = self.dense(recurrent)
        return output  # shape: (seq_len, batch, num_class)
        

    def _compute_output_shape(self,img_channel,  img_height, img_width):
        dummy_input = torch.randn(1, img_channel, img_height, img_width)
        dummy_output = self.model(dummy_input)
        #print(len(dummy_output))
        
        _, channels, height, width = dummy_output[2].size()
        #print('hihi')
        #print(channels, height, width)
        return channels, height, width

In [11]:
num_class = len(Synth90kDataset.LABEL2CHAR) + 1
model = MobileVit_improv(1,32,100,num_class)
#model.load_state_dict(torch.load('D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobileVit_chars74k_2.pth'),strict=False)
dummy_input = torch.randn(5, 1, 32, 100)
dummy_output =model(dummy_input)
dummy_output.size()

torch.Size([13, 5, 384])


torch.Size([13, 5, 37])

In [9]:
pytorch_total_params = sum(p.numel() for p in model.parameters())
pytorch_total_params

9494669

In [10]:

epochs = config['epochs']
train_batch_size = config['train_batch_size']
eval_batch_size = config['eval_batch_size']
lr = config['lr']
show_interval = config['show_interval']
valid_interval = config['valid_interval']
save_interval = config['save_interval']
end_interval = 40000
cpu_workers = config['cpu_workers']
reload_checkpoint = config['reload_checkpoint']
valid_max_iter = config['valid_max_iter']

img_width = config['img_width']
img_height = config['img_height']
img_channel = config['img_channel']
data_dir = config['data_dir']
#print(img_width,img_height)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'device: {device}')

train_dataset = Synth90kDataset(root_dir=data_dir, mode='train',
                                img_height=img_height, img_width=img_width)
valid_dataset = Synth90kDataset(root_dir=data_dir, mode='dev',
                                img_height=img_height, img_width=img_width)

print(valid_dataset.__len__())
train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=train_batch_size,
    shuffle=True,
    num_workers=0,
    collate_fn=synth90k_collate_fn)
valid_loader = DataLoader(
    dataset=valid_dataset,
    batch_size=eval_batch_size,
    shuffle=True,
    num_workers=0,
    collate_fn=synth90k_collate_fn)

num_class = len(Synth90kDataset.LABEL2CHAR) + 1
'''
crnn = CRNN(1, img_height, img_width, num_class,
            map_to_seq_hidden=config['map_to_seq_hidden'],
            rnn_hidden=config['rnn_hidden'],
            leaky_relu=config['leaky_relu'])
if reload_checkpoint:
    crnn.load_state_dict(torch.load(reload_checkpoint, map_location=device))
'''  
    
crnn = MobileVit_improv(1,32,100,num_class)
#crnn.load_state_dict(torch.load('D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobileVit_chars74k_2.pth'),strict=False)


crnn.to(device)

optimizer = optim.Adam(crnn.parameters())
criterion = CTCLoss(reduction='sum', zero_infinity=True)
criterion.to(device)
loss_history=[]
valid_history=[]
assert save_interval % valid_interval == 0
i = 1
for epoch in range(1, epochs + 1):
    print(f'epoch: {epoch}')
    tot_train_loss = 0.
    tot_train_count = 0
    for train_data in train_loader:
        loss = train_batch(crnn, train_data, optimizer, criterion, device)
        train_size = train_data[0].size(0)

        tot_train_loss += loss
        tot_train_count += train_size
        if i % show_interval == 0:
            print('train_batch_loss[', i, ']: ', loss / train_size)
            loss_history.append(loss / train_size)

        if i % valid_interval == 0:
            evaluation = evaluate(crnn, valid_loader, criterion,
                                  decode_method=config['decode_method'],
                                  beam_size=config['beam_size'])
            print('valid_evaluation: loss={loss}, acc={acc}'.format(**evaluation))
            valid_history.append(evaluation)
            if i % save_interval == 0:
                prefix = 'mobilevit_rnn'
                loss = evaluation['loss']
                save_model_path = os.path.join(config['checkpoints_dir'],
                                               f'{prefix}_{i:06}_loss{loss}.pt')
                torch.save(crnn.state_dict(), save_model_path)
                print('save model at ', save_model_path)

        if i % end_interval == 0:
            break
        i += 1

    print('train_loss: ', tot_train_loss / tot_train_count)

device: cuda
802734
epoch: 1
train_batch_loss[ 10 ]:  24.15996551513672
train_batch_loss[ 20 ]:  23.147846221923828
train_batch_loss[ 30 ]:  25.26816177368164
train_batch_loss[ 40 ]:  23.757020950317383
train_batch_loss[ 50 ]:  22.980716705322266
train_batch_loss[ 60 ]:  24.600717544555664
train_batch_loss[ 70 ]:  23.73973274230957
train_batch_loss[ 80 ]:  23.625492095947266
train_batch_loss[ 90 ]:  25.49618911743164
train_batch_loss[ 100 ]:  23.96426010131836
train_batch_loss[ 110 ]:  23.72730255126953
train_batch_loss[ 120 ]:  23.053531646728516
train_batch_loss[ 130 ]:  24.715412139892578
train_batch_loss[ 140 ]:  24.980304718017578
train_batch_loss[ 150 ]:  24.164417266845703
train_batch_loss[ 160 ]:  24.762134552001953
train_batch_loss[ 170 ]:  22.56833839416504
train_batch_loss[ 180 ]:  23.74498748779297
train_batch_loss[ 190 ]:  23.180675506591797
train_batch_loss[ 200 ]:  23.6967830657959
train_batch_loss[ 210 ]:  22.54412841796875
train_batch_loss[ 220 ]:  23.37295150756836
tr

train_batch_loss[ 1830 ]:  6.777268886566162
train_batch_loss[ 1840 ]:  5.645070552825928
train_batch_loss[ 1850 ]:  6.551827907562256
train_batch_loss[ 1860 ]:  8.61594009399414
train_batch_loss[ 1870 ]:  7.092624664306641
train_batch_loss[ 1880 ]:  5.739577293395996
train_batch_loss[ 1890 ]:  5.780702590942383
train_batch_loss[ 1900 ]:  5.878946781158447
train_batch_loss[ 1910 ]:  6.097238540649414
train_batch_loss[ 1920 ]:  6.918108940124512
train_batch_loss[ 1930 ]:  8.439742088317871
train_batch_loss[ 1940 ]:  6.271666526794434
train_batch_loss[ 1950 ]:  7.155298709869385
train_batch_loss[ 1960 ]:  6.4985575675964355
train_batch_loss[ 1970 ]:  7.308804512023926
train_batch_loss[ 1980 ]:  6.367013454437256
train_batch_loss[ 1990 ]:  4.788702011108398
train_batch_loss[ 2000 ]:  6.129495620727539
train_batch_loss[ 2010 ]:  5.942590713500977
train_batch_loss[ 2020 ]:  6.439490795135498
train_batch_loss[ 2030 ]:  7.721747875213623
train_batch_loss[ 2040 ]:  4.685502052307129
train_batc

train_batch_loss[ 3660 ]:  4.474054336547852
train_batch_loss[ 3670 ]:  3.400106430053711
train_batch_loss[ 3680 ]:  3.6643457412719727
train_batch_loss[ 3690 ]:  6.028110027313232
train_batch_loss[ 3700 ]:  5.409153938293457
train_batch_loss[ 3710 ]:  4.444601058959961
train_batch_loss[ 3720 ]:  5.254767417907715
train_batch_loss[ 3730 ]:  2.7112183570861816
train_batch_loss[ 3740 ]:  3.559403419494629
train_batch_loss[ 3750 ]:  4.480258464813232
train_batch_loss[ 3760 ]:  5.895580768585205
train_batch_loss[ 3770 ]:  3.7646400928497314
train_batch_loss[ 3780 ]:  5.0508012771606445
train_batch_loss[ 3790 ]:  4.66364860534668
train_batch_loss[ 3800 ]:  3.601590156555176
train_batch_loss[ 3810 ]:  4.4027605056762695
train_batch_loss[ 3820 ]:  5.053958415985107
train_batch_loss[ 3830 ]:  3.3217406272888184
train_batch_loss[ 3840 ]:  5.172924518585205
train_batch_loss[ 3850 ]:  4.680365562438965
train_batch_loss[ 3860 ]:  4.868142604827881
train_batch_loss[ 3870 ]:  5.086209774017334
train

Evaluate:   1%|█                                                                     | 23/1568 [00:17<19:02,  1.35it/s]

Corrupted image for 419552


Evaluate:  43%|█████████████████████████████▋                                       | 675/1568 [08:21<11:18,  1.32it/s]

Corrupted image for 620308


Evaluate:  98%|██████████████████████████████████████████████████████████████████▍ | 1533/1568 [19:18<00:27,  1.28it/s]

Corrupted image for 467544


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [19:45<00:00,  1.32it/s]


valid_evaluation: loss=4.386173766929108, acc=0.5138489213114182
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_004000_loss4.386173766929108.pt
train_batch_loss[ 4010 ]:  3.3605411052703857
train_batch_loss[ 4020 ]:  4.276453971862793
train_batch_loss[ 4030 ]:  3.543091297149658
train_batch_loss[ 4040 ]:  4.123657703399658
train_batch_loss[ 4050 ]:  3.367859363555908
train_batch_loss[ 4060 ]:  4.15635871887207
train_batch_loss[ 4070 ]:  5.016016006469727
train_batch_loss[ 4080 ]:  5.826628684997559
train_batch_loss[ 4090 ]:  5.960491180419922
train_batch_loss[ 4100 ]:  2.6540427207946777
train_batch_loss[ 4110 ]:  4.8044843673706055
train_batch_loss[ 4120 ]:  3.674482822418213
train_batch_loss[ 4130 ]:  3.9867453575134277
train_batch_loss[ 4140 ]:  2.7063183784484863
train_batch_loss[ 4150 ]:  3.722865104675293
train_batch_loss[ 4160 ]:  4.667121887207031
train_batch_loss[ 4170 ]:  5.455020427703857
train_batch_loss[ 4180 ]:  5.412019729614258
train_batch

train_batch_loss[ 5780 ]:  5.564684867858887
train_batch_loss[ 5790 ]:  4.564244270324707
train_batch_loss[ 5800 ]:  2.0545129776000977
train_batch_loss[ 5810 ]:  4.860746383666992
train_batch_loss[ 5820 ]:  2.809704065322876
train_batch_loss[ 5830 ]:  2.951230049133301
train_batch_loss[ 5840 ]:  4.239879608154297
train_batch_loss[ 5850 ]:  3.4913177490234375
train_batch_loss[ 5860 ]:  3.600365400314331
train_batch_loss[ 5870 ]:  4.146036148071289
train_batch_loss[ 5880 ]:  2.74113392829895
train_batch_loss[ 5890 ]:  4.017865180969238
train_batch_loss[ 5900 ]:  4.6050214767456055
train_batch_loss[ 5910 ]:  3.8105485439300537
train_batch_loss[ 5920 ]:  4.387855052947998
train_batch_loss[ 5930 ]:  4.004925727844238
train_batch_loss[ 5940 ]:  4.509212493896484
train_batch_loss[ 5950 ]:  4.781298637390137
train_batch_loss[ 5960 ]:  4.022619247436523
train_batch_loss[ 5970 ]:  2.8558359146118164
train_batch_loss[ 5980 ]:  3.52935791015625
train_batch_loss[ 5990 ]:  2.4359560012817383
train_

train_batch_loss[ 7590 ]:  2.8583264350891113
train_batch_loss[ 7600 ]:  2.799893379211426
train_batch_loss[ 7610 ]:  4.176643371582031
train_batch_loss[ 7620 ]:  4.089461326599121
train_batch_loss[ 7630 ]:  4.032027244567871
train_batch_loss[ 7640 ]:  2.660689353942871
train_batch_loss[ 7650 ]:  2.7143826484680176
train_batch_loss[ 7660 ]:  3.5761003494262695
train_batch_loss[ 7670 ]:  3.3309812545776367
train_batch_loss[ 7680 ]:  4.581599235534668
train_batch_loss[ 7690 ]:  2.1884989738464355
train_batch_loss[ 7700 ]:  3.900867462158203
train_batch_loss[ 7710 ]:  2.114635467529297
train_batch_loss[ 7720 ]:  3.964388370513916
train_batch_loss[ 7730 ]:  4.102213382720947
train_batch_loss[ 7740 ]:  2.7831733226776123
train_batch_loss[ 7750 ]:  4.156029224395752
train_batch_loss[ 7760 ]:  4.744307994842529
train_batch_loss[ 7770 ]:  3.0769901275634766
train_batch_loss[ 7780 ]:  3.013916015625
train_batch_loss[ 7790 ]:  4.65318489074707
train_batch_loss[ 7800 ]:  2.070864677429199
train_b

Evaluate:  52%|███████████████████████████████████▋                                 | 810/1568 [10:17<09:17,  1.36it/s]

Corrupted image for 419552


Evaluate:  69%|███████████████████████████████████████████████▏                    | 1089/1568 [13:47<05:44,  1.39it/s]

Corrupted image for 620308


Evaluate:  72%|████████████████████████████████████████████████▉                   | 1127/1568 [14:15<05:31,  1.33it/s]

Corrupted image for 467544


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [19:47<00:00,  1.32it/s]


valid_evaluation: loss=3.3962953778010037, acc=0.5946988666233148
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_008000_loss3.3962953778010037.pt
Corrupted image for 6067643
train_batch_loss[ 8010 ]:  4.024009704589844
train_batch_loss[ 8020 ]:  4.186408519744873
train_batch_loss[ 8030 ]:  2.7609617710113525
train_batch_loss[ 8040 ]:  4.294386863708496
train_batch_loss[ 8050 ]:  3.811673879623413
train_batch_loss[ 8060 ]:  2.5418574810028076
train_batch_loss[ 8070 ]:  4.031100273132324
train_batch_loss[ 8080 ]:  2.977506637573242
train_batch_loss[ 8090 ]:  4.071592330932617
train_batch_loss[ 8100 ]:  4.392199516296387
train_batch_loss[ 8110 ]:  3.46779727935791
train_batch_loss[ 8120 ]:  2.7329487800598145
train_batch_loss[ 8130 ]:  3.762125253677368
train_batch_loss[ 8140 ]:  2.744314670562744
train_batch_loss[ 8150 ]:  3.1948819160461426
train_batch_loss[ 8160 ]:  2.540170192718506
train_batch_loss[ 8170 ]:  3.347677230834961
train_batch_loss[ 8180 ]:  

train_batch_loss[ 9780 ]:  2.2659013271331787
train_batch_loss[ 9790 ]:  2.8676295280456543
train_batch_loss[ 9800 ]:  3.6717920303344727
train_batch_loss[ 9810 ]:  2.855881690979004
train_batch_loss[ 9820 ]:  2.676267147064209
train_batch_loss[ 9830 ]:  3.8697621822357178
train_batch_loss[ 9840 ]:  2.845757484436035
train_batch_loss[ 9850 ]:  3.3597302436828613
train_batch_loss[ 9860 ]:  2.4984874725341797
train_batch_loss[ 9870 ]:  3.0880913734436035
train_batch_loss[ 9880 ]:  2.0588293075561523
train_batch_loss[ 9890 ]:  2.8176498413085938
train_batch_loss[ 9900 ]:  3.575120210647583
train_batch_loss[ 9910 ]:  1.1748968362808228
train_batch_loss[ 9920 ]:  3.5393128395080566
train_batch_loss[ 9930 ]:  3.9009475708007812
train_batch_loss[ 9940 ]:  3.3099727630615234
train_batch_loss[ 9950 ]:  4.042572021484375
train_batch_loss[ 9960 ]:  2.771928310394287
train_batch_loss[ 9970 ]:  2.4640307426452637
train_batch_loss[ 9980 ]:  3.2011687755584717
train_batch_loss[ 9990 ]:  3.28926277160

train_batch_loss[ 11550 ]:  5.095932960510254
Corrupted image for 2072776
train_batch_loss[ 11560 ]:  4.013830661773682
train_batch_loss[ 11570 ]:  3.029609203338623
train_batch_loss[ 11580 ]:  3.2168545722961426
train_batch_loss[ 11590 ]:  2.6604318618774414
train_batch_loss[ 11600 ]:  2.3479795455932617
train_batch_loss[ 11610 ]:  2.6024680137634277
train_batch_loss[ 11620 ]:  3.1608457565307617
train_batch_loss[ 11630 ]:  2.443847894668579
train_batch_loss[ 11640 ]:  2.1782681941986084
train_batch_loss[ 11650 ]:  2.113478183746338
train_batch_loss[ 11660 ]:  3.6525216102600098
train_batch_loss[ 11670 ]:  2.3303472995758057
train_batch_loss[ 11680 ]:  3.1203486919403076
train_batch_loss[ 11690 ]:  3.5113449096679688
train_batch_loss[ 11700 ]:  2.8178799152374268
train_batch_loss[ 11710 ]:  3.870274066925049
train_batch_loss[ 11720 ]:  2.6214277744293213
train_batch_loss[ 11730 ]:  4.230501174926758
train_batch_loss[ 11740 ]:  3.3482022285461426
train_batch_loss[ 11750 ]:  3.928284645

Evaluate:  55%|██████████████████████████████████████                               | 866/1568 [10:50<09:24,  1.24it/s]

Corrupted image for 620308


Evaluate:  65%|████████████████████████████████████████████▍                       | 1024/1568 [12:50<06:38,  1.37it/s]

Corrupted image for 419552


Evaluate:  79%|█████████████████████████████████████████████████████▉              | 1243/1568 [15:38<04:14,  1.28it/s]

Corrupted image for 467544


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [19:43<00:00,  1.32it/s]


valid_evaluation: loss=2.99469157309801, acc=0.6328534732551505
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_012000_loss2.99469157309801.pt
train_batch_loss[ 12010 ]:  2.146674156188965
train_batch_loss[ 12020 ]:  2.3498787879943848
train_batch_loss[ 12030 ]:  3.3652114868164062
train_batch_loss[ 12040 ]:  3.873565196990967
train_batch_loss[ 12050 ]:  3.1133546829223633
train_batch_loss[ 12060 ]:  2.6603972911834717
train_batch_loss[ 12070 ]:  2.124276638031006
train_batch_loss[ 12080 ]:  2.367920398712158
train_batch_loss[ 12090 ]:  2.2565078735351562
train_batch_loss[ 12100 ]:  1.8780710697174072
train_batch_loss[ 12110 ]:  2.675384759902954
train_batch_loss[ 12120 ]:  3.2046878337860107
train_batch_loss[ 12130 ]:  2.282355785369873
train_batch_loss[ 12140 ]:  2.864759683609009
train_batch_loss[ 12150 ]:  3.2004499435424805
train_batch_loss[ 12160 ]:  2.901402473449707
train_batch_loss[ 12170 ]:  2.472896099090576
train_batch_loss[ 12180 ]:  3.7901682

train_batch_loss[ 13730 ]:  2.675703525543213
train_batch_loss[ 13740 ]:  1.4922802448272705
train_batch_loss[ 13750 ]:  2.851869821548462
train_batch_loss[ 13760 ]:  2.139336109161377
train_batch_loss[ 13770 ]:  2.7222890853881836
train_batch_loss[ 13780 ]:  2.3691015243530273
train_batch_loss[ 13790 ]:  1.02528715133667
train_batch_loss[ 13800 ]:  2.731905460357666
train_batch_loss[ 13810 ]:  2.8277621269226074
train_batch_loss[ 13820 ]:  2.4057059288024902
train_batch_loss[ 13830 ]:  3.0249080657958984
train_batch_loss[ 13840 ]:  1.9916614294052124
train_batch_loss[ 13850 ]:  2.639772891998291
train_batch_loss[ 13860 ]:  2.256176233291626
train_batch_loss[ 13870 ]:  1.6652781963348389
train_batch_loss[ 13880 ]:  3.6188085079193115
train_batch_loss[ 13890 ]:  3.23417329788208
train_batch_loss[ 13900 ]:  2.2666800022125244
train_batch_loss[ 13910 ]:  2.199646472930908
train_batch_loss[ 13920 ]:  3.881307601928711
train_batch_loss[ 13930 ]:  2.3508548736572266
train_batch_loss[ 13940 ]

train_batch_loss[ 15490 ]:  2.843254566192627
train_batch_loss[ 15500 ]:  3.8893942832946777
train_batch_loss[ 15510 ]:  2.8315625190734863
train_batch_loss[ 15520 ]:  2.236636161804199
train_batch_loss[ 15530 ]:  2.217818260192871
train_batch_loss[ 15540 ]:  3.315882682800293
train_batch_loss[ 15550 ]:  2.281083345413208
train_batch_loss[ 15560 ]:  3.3806705474853516
train_batch_loss[ 15570 ]:  3.7840487957000732
train_batch_loss[ 15580 ]:  2.5968918800354004
train_batch_loss[ 15590 ]:  2.347884178161621
train_batch_loss[ 15600 ]:  2.8892312049865723
train_batch_loss[ 15610 ]:  4.382162094116211
train_batch_loss[ 15620 ]:  2.628237724304199
train_batch_loss[ 15630 ]:  2.5925705432891846
train_batch_loss[ 15640 ]:  2.6678996086120605
train_batch_loss[ 15650 ]:  2.8747639656066895
train_batch_loss[ 15660 ]:  1.8911197185516357
train_batch_loss[ 15670 ]:  3.69628643989563
train_batch_loss[ 15680 ]:  2.3318517208099365
train_batch_loss[ 15690 ]:  3.236232280731201
train_batch_loss[ 15700 

Evaluate:  12%|████████▍                                                            | 192/1568 [02:25<17:54,  1.28it/s]

Corrupted image for 419552


Evaluate:  19%|█████████████▏                                                       | 301/1568 [03:47<15:24,  1.37it/s]

Corrupted image for 620308


Evaluate:  67%|█████████████████████████████████████████████▍                      | 1047/1568 [13:26<07:59,  1.09it/s]

Corrupted image for 467544


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [20:01<00:00,  1.30it/s]


valid_evaluation: loss=2.840756426217114, acc=0.6491652278338778
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_016000_loss2.840756426217114.pt
train_batch_loss[ 16010 ]:  2.7199819087982178
train_batch_loss[ 16020 ]:  2.6960830688476562
train_batch_loss[ 16030 ]:  2.5568184852600098
train_batch_loss[ 16040 ]:  2.388951063156128
train_batch_loss[ 16050 ]:  2.9268100261688232
train_batch_loss[ 16060 ]:  1.4052647352218628
train_batch_loss[ 16070 ]:  2.711935043334961
train_batch_loss[ 16080 ]:  3.389539957046509
train_batch_loss[ 16090 ]:  1.9301016330718994
train_batch_loss[ 16100 ]:  2.476998805999756
train_batch_loss[ 16110 ]:  2.4269614219665527
train_batch_loss[ 16120 ]:  4.033385276794434
train_batch_loss[ 16130 ]:  2.7900352478027344
train_batch_loss[ 16140 ]:  2.2267520427703857
train_batch_loss[ 16150 ]:  3.9943995475769043
train_batch_loss[ 16160 ]:  2.5597965717315674
train_batch_loss[ 16170 ]:  2.5012142658233643
train_batch_loss[ 16180 ]:  1.8

train_batch_loss[ 17740 ]:  3.1312313079833984
train_batch_loss[ 17750 ]:  3.5912961959838867
train_batch_loss[ 17760 ]:  2.0728888511657715
train_batch_loss[ 17770 ]:  3.173245429992676
train_batch_loss[ 17780 ]:  3.81070613861084
train_batch_loss[ 17790 ]:  2.9780900478363037
train_batch_loss[ 17800 ]:  3.032895088195801
train_batch_loss[ 17810 ]:  2.7991769313812256
train_batch_loss[ 17820 ]:  1.6991368532180786
train_batch_loss[ 17830 ]:  2.899123191833496
train_batch_loss[ 17840 ]:  3.7864949703216553
train_batch_loss[ 17850 ]:  2.478484869003296
train_batch_loss[ 17860 ]:  2.7513651847839355
train_batch_loss[ 17870 ]:  2.6246891021728516
train_batch_loss[ 17880 ]:  2.0285379886627197
train_batch_loss[ 17890 ]:  3.092200994491577
train_batch_loss[ 17900 ]:  3.5360260009765625
train_batch_loss[ 17910 ]:  2.6980037689208984
train_batch_loss[ 17920 ]:  2.1662635803222656
train_batch_loss[ 17930 ]:  4.458296775817871
train_batch_loss[ 17940 ]:  1.9900472164154053
train_batch_loss[ 179

train_batch_loss[ 19510 ]:  0.92497718334198
train_batch_loss[ 19520 ]:  3.9522061347961426
train_batch_loss[ 19530 ]:  2.541750431060791
train_batch_loss[ 19540 ]:  3.3042311668395996
train_batch_loss[ 19550 ]:  2.5829505920410156
train_batch_loss[ 19560 ]:  3.287001132965088
train_batch_loss[ 19570 ]:  2.738680839538574
train_batch_loss[ 19580 ]:  3.603644609451294
train_batch_loss[ 19590 ]:  4.0000810623168945
train_batch_loss[ 19600 ]:  2.2213997840881348
train_batch_loss[ 19610 ]:  3.56644868850708
train_batch_loss[ 19620 ]:  2.3764243125915527
train_batch_loss[ 19630 ]:  3.5474958419799805
train_batch_loss[ 19640 ]:  3.3402109146118164
train_batch_loss[ 19650 ]:  2.957676887512207
train_batch_loss[ 19660 ]:  4.870100021362305
train_batch_loss[ 19670 ]:  3.620656967163086
train_batch_loss[ 19680 ]:  2.741661548614502
train_batch_loss[ 19690 ]:  1.655970573425293
train_batch_loss[ 19700 ]:  1.900451898574829
train_batch_loss[ 19710 ]:  1.715912103652954
train_batch_loss[ 19720 ]:  

Evaluate:  53%|████████████████████████████████████▊                                | 836/1568 [10:35<08:52,  1.38it/s]

Corrupted image for 419552


Evaluate:  91%|█████████████████████████████████████████████████████████████▊      | 1424/1568 [18:02<01:52,  1.28it/s]

Corrupted image for 467544


Evaluate:  94%|████████████████████████████████████████████████████████████████▏   | 1480/1568 [18:45<01:05,  1.34it/s]

Corrupted image for 620308


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [19:51<00:00,  1.32it/s]


valid_evaluation: loss=2.726081011484317, acc=0.6549081015629088
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_020000_loss2.726081011484317.pt
train_batch_loss[ 20010 ]:  1.9452176094055176
train_batch_loss[ 20020 ]:  3.019061803817749
train_batch_loss[ 20030 ]:  2.1578879356384277
train_batch_loss[ 20040 ]:  2.46071195602417
train_batch_loss[ 20050 ]:  1.8679523468017578
train_batch_loss[ 20060 ]:  3.664984703063965
train_batch_loss[ 20070 ]:  2.6909337043762207
train_batch_loss[ 20080 ]:  2.065631628036499
train_batch_loss[ 20090 ]:  2.642404317855835
train_batch_loss[ 20100 ]:  2.0246083736419678
train_batch_loss[ 20110 ]:  3.7004570960998535
train_batch_loss[ 20120 ]:  2.14748215675354
train_batch_loss[ 20130 ]:  2.396419048309326
train_batch_loss[ 20140 ]:  2.3536767959594727
train_batch_loss[ 20150 ]:  3.1732139587402344
train_batch_loss[ 20160 ]:  3.3700921535491943
train_batch_loss[ 20170 ]:  3.0904712677001953
train_batch_loss[ 20180 ]:  2.43453

train_batch_loss[ 21740 ]:  4.745920658111572
train_batch_loss[ 21750 ]:  3.3140368461608887
train_batch_loss[ 21760 ]:  2.8380422592163086
train_batch_loss[ 21770 ]:  2.778181791305542
train_batch_loss[ 21780 ]:  2.8510966300964355
train_batch_loss[ 21790 ]:  2.172051429748535
train_batch_loss[ 21800 ]:  2.4514999389648438
train_batch_loss[ 21810 ]:  2.9373459815979004
train_batch_loss[ 21820 ]:  1.9356063604354858
train_batch_loss[ 21830 ]:  1.7554285526275635
train_batch_loss[ 21840 ]:  2.565042734146118
train_batch_loss[ 21850 ]:  1.916189432144165
train_batch_loss[ 21860 ]:  2.491337299346924
train_batch_loss[ 21870 ]:  3.8357696533203125
train_batch_loss[ 21880 ]:  2.89042329788208
train_batch_loss[ 21890 ]:  3.119723081588745
train_batch_loss[ 21900 ]:  2.344369649887085
train_batch_loss[ 21910 ]:  2.642214775085449
train_batch_loss[ 21920 ]:  2.7763922214508057
train_batch_loss[ 21930 ]:  4.178332328796387
train_batch_loss[ 21940 ]:  3.7095227241516113
train_batch_loss[ 21950 ]

train_batch_loss[ 23500 ]:  3.1048154830932617
train_batch_loss[ 23510 ]:  2.745412826538086
train_batch_loss[ 23520 ]:  3.4219093322753906
train_batch_loss[ 23530 ]:  1.6765873432159424
train_batch_loss[ 23540 ]:  2.3115007877349854
train_batch_loss[ 23550 ]:  1.6161423921585083
train_batch_loss[ 23560 ]:  2.7440993785858154
train_batch_loss[ 23570 ]:  1.9067859649658203
train_batch_loss[ 23580 ]:  3.2827963829040527
train_batch_loss[ 23590 ]:  2.4742488861083984
train_batch_loss[ 23600 ]:  2.3033246994018555
train_batch_loss[ 23610 ]:  2.486053705215454
train_batch_loss[ 23620 ]:  1.7760592699050903
train_batch_loss[ 23630 ]:  3.4985830783843994
train_batch_loss[ 23640 ]:  2.047865390777588
train_batch_loss[ 23650 ]:  1.9063730239868164
train_batch_loss[ 23660 ]:  3.6877856254577637
train_batch_loss[ 23670 ]:  2.1321160793304443
train_batch_loss[ 23680 ]:  1.8580162525177002
train_batch_loss[ 23690 ]:  1.8340630531311035
train_batch_loss[ 23700 ]:  2.137509346008301
train_batch_loss[

Evaluate:   3%|█▊                                                                    | 40/1568 [00:30<18:34,  1.37it/s]

Corrupted image for 620308


Evaluate:  76%|███████████████████████████████████████████████████▍                | 1186/1568 [14:59<04:43,  1.35it/s]

Corrupted image for 467544


Evaluate:  90%|████████████████████████████████████████████████████████████▉       | 1404/1568 [17:44<02:06,  1.30it/s]

Corrupted image for 419552


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [19:47<00:00,  1.32it/s]


valid_evaluation: loss=2.604986959933081, acc=0.6674726621770101
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_024000_loss2.604986959933081.pt
train_batch_loss[ 24010 ]:  2.1850967407226562
train_batch_loss[ 24020 ]:  2.529811382293701
train_batch_loss[ 24030 ]:  2.940633773803711
train_batch_loss[ 24040 ]:  3.1050915718078613
train_batch_loss[ 24050 ]:  1.5240192413330078
train_batch_loss[ 24060 ]:  3.1210694313049316
train_batch_loss[ 24070 ]:  1.8156659603118896
train_batch_loss[ 24080 ]:  3.283167839050293
train_batch_loss[ 24090 ]:  2.7642669677734375
train_batch_loss[ 24100 ]:  2.7510018348693848
train_batch_loss[ 24110 ]:  3.509368896484375
train_batch_loss[ 24120 ]:  2.9747228622436523
train_batch_loss[ 24130 ]:  2.8947644233703613
train_batch_loss[ 24140 ]:  2.1475515365600586
train_batch_loss[ 24150 ]:  3.3210153579711914
train_batch_loss[ 24160 ]:  2.112743377685547
train_batch_loss[ 24170 ]:  2.602715015411377
train_batch_loss[ 24180 ]:  1.31

train_batch_loss[ 25740 ]:  2.992405891418457
train_batch_loss[ 25750 ]:  2.471648693084717
train_batch_loss[ 25760 ]:  3.900155544281006
train_batch_loss[ 25770 ]:  4.54711389541626
train_batch_loss[ 25780 ]:  4.239180564880371
train_batch_loss[ 25790 ]:  2.5579848289489746
train_batch_loss[ 25800 ]:  1.7264628410339355
train_batch_loss[ 25810 ]:  3.2710397243499756
train_batch_loss[ 25820 ]:  2.488562822341919
train_batch_loss[ 25830 ]:  1.3172740936279297
train_batch_loss[ 25840 ]:  2.387136697769165
train_batch_loss[ 25850 ]:  1.913496971130371
train_batch_loss[ 25860 ]:  2.5277247428894043
train_batch_loss[ 25870 ]:  3.2836499214172363
train_batch_loss[ 25880 ]:  5.146964073181152
train_batch_loss[ 25890 ]:  2.813624382019043
train_batch_loss[ 25900 ]:  2.4133152961730957
train_batch_loss[ 25910 ]:  3.3269314765930176
train_batch_loss[ 25920 ]:  1.7717070579528809
train_batch_loss[ 25930 ]:  1.9624296426773071
train_batch_loss[ 25940 ]:  2.3368895053863525
train_batch_loss[ 25950 

train_batch_loss[ 27510 ]:  2.126169443130493
train_batch_loss[ 27520 ]:  2.42368483543396
train_batch_loss[ 27530 ]:  1.9795591831207275
train_batch_loss[ 27540 ]:  1.4346262216567993
train_batch_loss[ 27550 ]:  2.329599142074585
train_batch_loss[ 27560 ]:  2.400817394256592
train_batch_loss[ 27570 ]:  2.5735983848571777
train_batch_loss[ 27580 ]:  1.1568108797073364
Corrupted image for 5347556
train_batch_loss[ 27590 ]:  2.443284511566162
train_batch_loss[ 27600 ]:  3.585796356201172
train_batch_loss[ 27610 ]:  2.610949993133545
train_batch_loss[ 27620 ]:  2.488492012023926
train_batch_loss[ 27630 ]:  2.4565622806549072
train_batch_loss[ 27640 ]:  2.2912089824676514
train_batch_loss[ 27650 ]:  1.9036458730697632
train_batch_loss[ 27660 ]:  3.459951400756836
train_batch_loss[ 27670 ]:  3.43109130859375
train_batch_loss[ 27680 ]:  2.774888515472412
train_batch_loss[ 27690 ]:  1.9422929286956787
Corrupted image for 1619592
train_batch_loss[ 27700 ]:  2.9220190048217773
train_batch_loss[

Evaluate:  44%|██████████████████████████████▏                                      | 687/1568 [08:41<10:51,  1.35it/s]

Corrupted image for 620308


Evaluate:  46%|███████████████████████████████▋                                     | 720/1568 [09:07<11:29,  1.23it/s]

Corrupted image for 419552


Evaluate:  78%|████████████████████████████████████████████████████▉               | 1222/1568 [15:29<04:15,  1.35it/s]

Corrupted image for 467544


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [19:55<00:00,  1.31it/s]


valid_evaluation: loss=2.5704878677066194, acc=0.6739505240839431
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_028000_loss2.5704878677066194.pt
train_batch_loss[ 28010 ]:  1.2127981185913086
train_batch_loss[ 28020 ]:  2.693016529083252
train_batch_loss[ 28030 ]:  3.169511318206787
train_batch_loss[ 28040 ]:  2.784127712249756
train_batch_loss[ 28050 ]:  1.4690556526184082
train_batch_loss[ 28060 ]:  2.0349342823028564
train_batch_loss[ 28070 ]:  3.3360605239868164
train_batch_loss[ 28080 ]:  3.5474181175231934
train_batch_loss[ 28090 ]:  2.7570226192474365
train_batch_loss[ 28100 ]:  2.320298194885254
train_batch_loss[ 28110 ]:  3.3796427249908447
train_batch_loss[ 28120 ]:  2.5332930088043213
train_batch_loss[ 28130 ]:  3.3639869689941406
train_batch_loss[ 28140 ]:  2.5940723419189453
train_batch_loss[ 28150 ]:  1.4744311571121216
train_batch_loss[ 28160 ]:  2.5915775299072266
train_batch_loss[ 28170 ]:  2.101202964782715
train_batch_loss[ 28180 ]:  1

train_batch_loss[ 29740 ]:  2.4689576625823975
train_batch_loss[ 29750 ]:  1.7678923606872559
train_batch_loss[ 29760 ]:  2.296664237976074
train_batch_loss[ 29770 ]:  2.7452492713928223
train_batch_loss[ 29780 ]:  4.586623668670654
train_batch_loss[ 29790 ]:  2.828092098236084
train_batch_loss[ 29800 ]:  1.4729111194610596
train_batch_loss[ 29810 ]:  1.2029780149459839
train_batch_loss[ 29820 ]:  1.8397245407104492
train_batch_loss[ 29830 ]:  3.9571642875671387
train_batch_loss[ 29840 ]:  3.183110475540161
train_batch_loss[ 29850 ]:  2.751477003097534
train_batch_loss[ 29860 ]:  2.872015953063965
train_batch_loss[ 29870 ]:  2.612595558166504
train_batch_loss[ 29880 ]:  3.9367713928222656
train_batch_loss[ 29890 ]:  3.4156689643859863
train_batch_loss[ 29900 ]:  2.524311065673828
train_batch_loss[ 29910 ]:  2.6377861499786377
train_batch_loss[ 29920 ]:  3.1169981956481934
train_batch_loss[ 29930 ]:  2.83587646484375
train_batch_loss[ 29940 ]:  2.0779552459716797
train_batch_loss[ 29950

train_batch_loss[ 31510 ]:  2.703460693359375
train_batch_loss[ 31520 ]:  2.3411784172058105
train_batch_loss[ 31530 ]:  3.1991701126098633
train_batch_loss[ 31540 ]:  3.372413396835327
train_batch_loss[ 31550 ]:  2.2323789596557617
train_batch_loss[ 31560 ]:  2.36026930809021
train_batch_loss[ 31570 ]:  1.428975224494934
train_batch_loss[ 31580 ]:  2.4485151767730713
train_batch_loss[ 31590 ]:  1.695000171661377
train_batch_loss[ 31600 ]:  2.5546834468841553
train_batch_loss[ 31610 ]:  3.5620274543762207
train_batch_loss[ 31620 ]:  1.9700627326965332
train_batch_loss[ 31630 ]:  3.5621466636657715
train_batch_loss[ 31640 ]:  2.856795310974121
train_batch_loss[ 31650 ]:  1.6995203495025635
train_batch_loss[ 31660 ]:  3.188323497772217
train_batch_loss[ 31670 ]:  2.194424629211426
train_batch_loss[ 31680 ]:  2.9655635356903076
train_batch_loss[ 31690 ]:  2.2159132957458496
train_batch_loss[ 31700 ]:  2.4671454429626465
train_batch_loss[ 31710 ]:  2.6939353942871094
train_batch_loss[ 3172

Evaluate:   5%|███▊                                                                  | 84/1568 [01:04<19:24,  1.27it/s]

Corrupted image for 620308


Evaluate:  34%|███████████████████████▍                                             | 532/1568 [06:42<13:13,  1.30it/s]

Corrupted image for 467544


Evaluate:  45%|███████████████████████████████                                      | 706/1568 [08:57<10:38,  1.35it/s]

Corrupted image for 419552


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [19:54<00:00,  1.31it/s]


valid_evaluation: loss=2.568006475124071, acc=0.6726474772465101
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_032000_loss2.568006475124071.pt
train_batch_loss[ 32010 ]:  2.5638279914855957
train_batch_loss[ 32020 ]:  1.8428720235824585
train_batch_loss[ 32030 ]:  2.246119499206543
train_batch_loss[ 32040 ]:  4.500577926635742
train_batch_loss[ 32050 ]:  2.9056434631347656
train_batch_loss[ 32060 ]:  2.5735671520233154
train_batch_loss[ 32070 ]:  2.4809231758117676
train_batch_loss[ 32080 ]:  2.2389845848083496
train_batch_loss[ 32090 ]:  1.1798793077468872
train_batch_loss[ 32100 ]:  2.282726287841797
train_batch_loss[ 32110 ]:  2.940530776977539
train_batch_loss[ 32120 ]:  2.7009243965148926
train_batch_loss[ 32130 ]:  2.628108501434326
train_batch_loss[ 32140 ]:  3.213829278945923
train_batch_loss[ 32150 ]:  1.8637754917144775
train_batch_loss[ 32160 ]:  2.865316867828369
train_batch_loss[ 32170 ]:  2.495148181915283
train_batch_loss[ 32180 ]:  1.9511

train_batch_loss[ 33740 ]:  2.31701922416687
train_batch_loss[ 33750 ]:  1.0991578102111816
train_batch_loss[ 33760 ]:  2.1403565406799316
train_batch_loss[ 33770 ]:  3.6137890815734863
train_batch_loss[ 33780 ]:  4.031314373016357
train_batch_loss[ 33790 ]:  3.3452401161193848
train_batch_loss[ 33800 ]:  2.168928384780884
train_batch_loss[ 33810 ]:  2.2519025802612305
train_batch_loss[ 33820 ]:  3.675935745239258
train_batch_loss[ 33830 ]:  2.153416872024536
train_batch_loss[ 33840 ]:  1.9516220092773438
train_batch_loss[ 33850 ]:  1.8790249824523926
train_batch_loss[ 33860 ]:  2.426741600036621
train_batch_loss[ 33870 ]:  3.2713470458984375
train_batch_loss[ 33880 ]:  2.196927547454834
train_batch_loss[ 33890 ]:  3.11981201171875
train_batch_loss[ 33900 ]:  1.8136166334152222
train_batch_loss[ 33910 ]:  2.7697272300720215
train_batch_loss[ 33920 ]:  2.7774057388305664
train_batch_loss[ 33930 ]:  3.053694486618042
train_batch_loss[ 33940 ]:  1.926832675933838
train_batch_loss[ 33950 ]

train_batch_loss[ 35510 ]:  2.776036024093628
train_batch_loss[ 35520 ]:  3.564734697341919
train_batch_loss[ 35530 ]:  2.0254247188568115
train_batch_loss[ 35540 ]:  3.187138557434082
train_batch_loss[ 35550 ]:  1.590101718902588
train_batch_loss[ 35560 ]:  2.445668935775757
train_batch_loss[ 35570 ]:  2.4911000728607178
train_batch_loss[ 35580 ]:  1.6064786911010742
train_batch_loss[ 35590 ]:  2.1085524559020996
train_batch_loss[ 35600 ]:  2.4806060791015625
train_batch_loss[ 35610 ]:  3.0340256690979004
train_batch_loss[ 35620 ]:  3.5618112087249756
train_batch_loss[ 35630 ]:  2.4551334381103516
train_batch_loss[ 35640 ]:  2.5013949871063232
train_batch_loss[ 35650 ]:  1.7825660705566406
train_batch_loss[ 35660 ]:  2.743065357208252
train_batch_loss[ 35670 ]:  3.306652545928955
train_batch_loss[ 35680 ]:  1.9334659576416016
train_batch_loss[ 35690 ]:  4.230473518371582
train_batch_loss[ 35700 ]:  3.4755125045776367
train_batch_loss[ 35710 ]:  1.4321849346160889
train_batch_loss[ 357

Evaluate:  24%|████████████████▋                                                    | 379/1568 [04:49<15:42,  1.26it/s]

Corrupted image for 620308


Evaluate:  40%|███████████████████████████▍                                         | 624/1568 [07:54<12:06,  1.30it/s]

Corrupted image for 467544


Evaluate:  96%|█████████████████████████████████████████████████████████████████   | 1501/1568 [19:02<00:52,  1.28it/s]

Corrupted image for 419552


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [19:53<00:00,  1.31it/s]


valid_evaluation: loss=2.4916434988334593, acc=0.6819033453173778
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_036000_loss2.4916434988334593.pt
train_batch_loss[ 36010 ]:  1.8255667686462402
train_batch_loss[ 36020 ]:  2.9596099853515625
train_batch_loss[ 36030 ]:  3.6805403232574463
train_batch_loss[ 36040 ]:  2.6622707843780518
train_batch_loss[ 36050 ]:  3.8188915252685547
train_batch_loss[ 36060 ]:  2.999582529067993
train_batch_loss[ 36070 ]:  3.2273738384246826
train_batch_loss[ 36080 ]:  3.2643251419067383
train_batch_loss[ 36090 ]:  2.3757333755493164
train_batch_loss[ 36100 ]:  2.107017993927002
train_batch_loss[ 36110 ]:  3.034560441970825
train_batch_loss[ 36120 ]:  2.6915059089660645
train_batch_loss[ 36130 ]:  2.10434889793396
train_batch_loss[ 36140 ]:  1.6812806129455566
train_batch_loss[ 36150 ]:  2.3932721614837646
train_batch_loss[ 36160 ]:  2.2754127979278564
train_batch_loss[ 36170 ]:  1.3736217021942139
train_batch_loss[ 36180 ]:  2

train_batch_loss[ 37740 ]:  1.5126953125
train_batch_loss[ 37750 ]:  2.474444627761841
train_batch_loss[ 37760 ]:  1.574772596359253
train_batch_loss[ 37770 ]:  2.925699234008789
train_batch_loss[ 37780 ]:  3.44254207611084
train_batch_loss[ 37790 ]:  2.000445604324341
train_batch_loss[ 37800 ]:  2.847311019897461
train_batch_loss[ 37810 ]:  2.1543784141540527
train_batch_loss[ 37820 ]:  1.715977668762207
train_batch_loss[ 37830 ]:  2.3335046768188477
train_batch_loss[ 37840 ]:  2.8229408264160156
train_batch_loss[ 37850 ]:  2.4335622787475586
train_batch_loss[ 37860 ]:  2.3965601921081543
train_batch_loss[ 37870 ]:  1.9859386682510376
train_batch_loss[ 37880 ]:  3.3096423149108887
train_batch_loss[ 37890 ]:  2.1765544414520264
train_batch_loss[ 37900 ]:  2.667968273162842
train_batch_loss[ 37910 ]:  3.139277935028076
train_batch_loss[ 37920 ]:  1.5541976690292358
train_batch_loss[ 37930 ]:  1.56730318069458
train_batch_loss[ 37940 ]:  2.5468368530273438
train_batch_loss[ 37950 ]:  3.0

train_batch_loss[ 39500 ]:  1.8168895244598389
train_batch_loss[ 39510 ]:  2.89300274848938
train_batch_loss[ 39520 ]:  3.258483409881592
train_batch_loss[ 39530 ]:  0.8561418652534485
train_batch_loss[ 39540 ]:  2.3412113189697266
train_batch_loss[ 39550 ]:  1.9895020723342896
train_batch_loss[ 39560 ]:  2.9205493927001953
train_batch_loss[ 39570 ]:  1.6345503330230713
train_batch_loss[ 39580 ]:  3.309140682220459
train_batch_loss[ 39590 ]:  3.3880088329315186
train_batch_loss[ 39600 ]:  2.224439859390259
train_batch_loss[ 39610 ]:  3.2059433460235596
train_batch_loss[ 39620 ]:  2.3765430450439453
train_batch_loss[ 39630 ]:  2.824875831604004
train_batch_loss[ 39640 ]:  2.0805413722991943
train_batch_loss[ 39650 ]:  1.7940776348114014
train_batch_loss[ 39660 ]:  3.6219944953918457
train_batch_loss[ 39670 ]:  2.6655189990997314
train_batch_loss[ 39680 ]:  2.5373735427856445
train_batch_loss[ 39690 ]:  3.33853816986084
train_batch_loss[ 39700 ]:  3.6362428665161133
train_batch_loss[ 397

Evaluate:   3%|██▎                                                                   | 53/1568 [00:36<17:52,  1.41it/s]

Corrupted image for 620308


Evaluate:  50%|██████████████████████████████████▋                                  | 788/1568 [09:44<09:26,  1.38it/s]

Corrupted image for 467544


Evaluate:  90%|█████████████████████████████████████████████████████████████▍      | 1417/1568 [17:40<01:48,  1.39it/s]

Corrupted image for 419552


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [19:57<00:00,  1.31it/s]

valid_evaluation: loss=2.5035567038361, acc=0.6781125503591476
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_040000_loss2.5035567038361.pt
train_loss:  3.421708146211505





In [12]:

import os
import pickle

 
with open('D:/lab2/深度学习课程设计/crnn-pytorch-master/train_history/mobileVit_rnn_loss', 'wb') as fp:
    pickle.dump([loss_history,valid_history], fp)

MemoryError: 

In [None]:
with open ('D:/lab2/深度学习课程设计/crnn-pytorch-master/train_history/mobileVit_rnn_1', 'rb') as fp:
    temp = pickle.load(fp)
temp

In [None]:
pytorch_total_params = sum(p.numel() for p in crnn.parameters())
pytorch_total_params

In [13]:
loss_history

[24.15996551513672,
 23.147846221923828,
 25.26816177368164,
 23.757020950317383,
 22.980716705322266,
 24.600717544555664,
 23.73973274230957,
 23.625492095947266,
 25.49618911743164,
 23.96426010131836,
 23.72730255126953,
 23.053531646728516,
 24.715412139892578,
 24.980304718017578,
 24.164417266845703,
 24.762134552001953,
 22.56833839416504,
 23.74498748779297,
 23.180675506591797,
 23.6967830657959,
 22.54412841796875,
 23.37295150756836,
 21.49481773376465,
 23.705638885498047,
 20.418201446533203,
 23.384031295776367,
 21.797086715698242,
 22.812301635742188,
 20.524314880371094,
 21.212459564208984,
 23.029254913330078,
 21.234966278076172,
 21.494609832763672,
 21.280935287475586,
 22.542755126953125,
 21.828720092773438,
 21.478775024414062,
 19.28566551208496,
 20.738784790039062,
 20.330886840820312,
 20.08675765991211,
 17.75045394897461,
 18.13392448425293,
 18.938695907592773,
 20.794984817504883,
 18.07672119140625,
 18.164703369140625,
 18.431640625,
 15.343786239624

# CRNN with MobileViT blocks

In [27]:
class CRNN_vit(nn.Module):

    def __init__(self, img_channel, img_height, img_width, num_class,
                 map_to_seq_hidden=64, rnn_hidden=256, leaky_relu=False):
        super(CRNN_vit, self).__init__()

        self.cnn, (output_channel, output_height, output_width) = \
            self._cnn_backbone(img_channel, img_height, img_width, leaky_relu)

        self.map_to_seq = nn.Linear(output_channel * output_height, map_to_seq_hidden)

        self.rnn1 = nn.LSTM(map_to_seq_hidden, rnn_hidden, bidirectional=True)
        self.rnn2 = nn.LSTM(2 * rnn_hidden, rnn_hidden, bidirectional=True)

        self.dense = nn.Linear(2 * rnn_hidden, num_class)

    def _cnn_backbone(self, img_channel, img_height, img_width, leaky_relu):
        assert img_height % 16 == 0
        assert img_width % 4 == 0

        channels = [img_channel, 64, 128, 256, 256, 512, 512, 512]
        kernel_sizes = [3, 3, 3, 3, 3, 3, 2]
        strides = [1, 1, 1, 1, 1, 1, 1]
        paddings = [1, 1, 1, 1, 1, 1, 0]
        L = [2, 4, 3]
        dims = [64, 80, 96]
        patch_size=(2,2)
        cnn = nn.Sequential()
        
        

        def conv_relu(i, batch_norm=False):
            # shape of input: (batch, input_channel, height, width)
            input_channel = channels[i]
            output_channel = channels[i+1]

            cnn.add_module(
                f'conv{i}',
                nn.Conv2d(input_channel, output_channel, kernel_sizes[i], strides[i], paddings[i])
            )

            if batch_norm:
                cnn.add_module(f'batchnorm{i}', nn.BatchNorm2d(output_channel))

            relu = nn.LeakyReLU(0.2, inplace=True) if leaky_relu else nn.ReLU(inplace=True)
            cnn.add_module(f'relu{i}', relu)

        # size of image: (channel, height, width) = (img_channel, img_height, img_width)
        conv_relu(0)
        cnn.add_module('pooling0', nn.MaxPool2d(kernel_size=2, stride=2))
        # (64, img_height // 2, img_width // 2)
        
        # add vit block
        cnn.add_module('MobViT1', MobileViTBlock(dims[0], L[0], channels[1], kernel_sizes[1], patch_size, int(dims[0]*2)))
        conv_relu(1)
        cnn.add_module('pooling1', nn.MaxPool2d(kernel_size=2, stride=2))
        # (128, img_height // 4, img_width // 4)
        
        # add vit block
        cnn.add_module('MobViT1', MobileViTBlock(dims[1], L[1], channels[1], kernel_sizes[2], patch_size, int(dims[1]*2)))
        conv_relu(2)
        conv_relu(3)
        cnn.add_module(
            'pooling2',
            nn.MaxPool2d(kernel_size=(2, 1))
        )  # (256, img_height // 8, img_width // 4)

        conv_relu(4, batch_norm=True)
        conv_relu(5, batch_norm=True)
        cnn.add_module(
            'pooling3',
            nn.MaxPool2d(kernel_size=(2, 1))
        )  # (512, img_height // 16, img_width // 4)

        conv_relu(6)  # (512, img_height // 16 - 1, img_width // 4 - 1)

        output_channel, output_height, output_width = \
            channels[-1], img_height // 16 - 1, img_width // 4 - 1
        return cnn, (output_channel, output_height, output_width)

    def forward(self, images):
        # shape of images: (batch, channel, height, width)

        conv = self.cnn(images)
        batch, channel, height, width = conv.size()
        
        conv = conv.view(batch, channel * height, width)
        conv = conv.permute(2, 0, 1)  # (width, batch, feature)
        #print(conv.size())
        seq = self.map_to_seq(conv)

        recurrent, _ = self.rnn1(seq)
        recurrent, _ = self.rnn2(recurrent)

        output = self.dense(recurrent)
        return output  # shape: (seq_len, batch, num_class)

In [28]:
num_class = len(Synth90kDataset.LABEL2CHAR) + 1
print(num_class)
model = CRNN_vit(1, 32, 100, num_class,
            map_to_seq_hidden=config['map_to_seq_hidden'],
            rnn_hidden=config['rnn_hidden'],
            leaky_relu=config['leaky_relu'])
#model.load_state_dict(torch.load('D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobileVit_chars74k_2.pth'),strict=False)
dummy_input = torch.randn(5, 1, 32, 100)
dummy_output =model(dummy_input)
dummy_output.size()

37


torch.Size([24, 5, 37])

In [29]:
pytorch_total_params = sum(p.numel() for p in model.parameters())
pytorch_total_params

8106373

In [31]:

epochs = config['epochs']
train_batch_size = config['train_batch_size']
eval_batch_size = config['eval_batch_size']
lr = config['lr']
show_interval = config['show_interval']
valid_interval = config['valid_interval']
save_interval = config['save_interval']
end_interval = 40000
cpu_workers = config['cpu_workers']
reload_checkpoint = config['reload_checkpoint']
valid_max_iter = config['valid_max_iter']

img_width = 128
img_height = 32
img_channel = config['img_channel']
data_dir = config['data_dir']
#print(img_width,img_height)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'device: {device}')

train_dataset = Synth90kDataset(root_dir=data_dir, mode='train',
                                img_height=img_height, img_width=img_width)
valid_dataset = Synth90kDataset(root_dir=data_dir, mode='dev',
                                img_height=img_height, img_width=img_width)

print(valid_dataset.__len__())
train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=train_batch_size,
    shuffle=True,
    num_workers=0,
    collate_fn=synth90k_collate_fn)
valid_loader = DataLoader(
    dataset=valid_dataset,
    batch_size=eval_batch_size,
    shuffle=True,
    num_workers=0,
    collate_fn=synth90k_collate_fn)

num_class = len(Synth90kDataset.LABEL2CHAR) + 1
'''
crnn = CRNN(1, img_height, img_width, num_class,
            map_to_seq_hidden=config['map_to_seq_hidden'],
            rnn_hidden=config['rnn_hidden'],
            leaky_relu=config['leaky_relu'])
if reload_checkpoint:
    crnn.load_state_dict(torch.load(reload_checkpoint, map_location=device))
'''  
crnn = CRNN_vit(1, 32, 100, num_class,
            map_to_seq_hidden=config['map_to_seq_hidden'],
            rnn_hidden=config['rnn_hidden'],
            leaky_relu=config['leaky_relu'])

pytorch_total_params = sum(p.numel() for p in crnn.parameters())
print('total param: ', pytorch_total_params)


device: cuda
802734
total param:  8106373


In [33]:

crnn.to(device)

optimizer = optim.Adam(crnn.parameters())
criterion = CTCLoss(reduction='sum', zero_infinity=True)
criterion.to(device)
loss_history=[]
valid_history=[]
assert save_interval % valid_interval == 0
i = 1
for epoch in range(1, epochs + 1):
    print(f'epoch: {epoch}')
    tot_train_loss = 0.
    tot_train_count = 0
    for train_data in train_loader:
        loss = train_batch(crnn, train_data, optimizer, criterion, device)
        train_size = train_data[0].size(0)

        tot_train_loss += loss
        tot_train_count += train_size
        if i % show_interval == 0:
            print('train_batch_loss[', i, ']: ', loss / train_size)
            loss_history.append(loss / train_size)

        if i % valid_interval == 0:
            evaluation = evaluate(crnn, valid_loader, criterion,
                                  decode_method=config['decode_method'],
                                  beam_size=config['beam_size'])
            print('valid_evaluation: loss={loss}, acc={acc}'.format(**evaluation))
            valid_history.append(evaluation)
            if i % save_interval == 0:
                prefix = 'mobilevit_rnn_v4'
                loss = evaluation['loss']
                save_model_path = os.path.join(config['checkpoints_dir'],
                                               f'{prefix}_{i:06}_loss{loss}.pt')
                torch.save(crnn.state_dict(), save_model_path)
                print('save model at ', save_model_path)

        if i % end_interval == 0:
            break
        i += 1

    print('train_loss: ', tot_train_loss / tot_train_count)

epoch: 1
train_batch_loss[ 10 ]:  26.32318115234375
train_batch_loss[ 20 ]:  25.92038917541504
train_batch_loss[ 30 ]:  25.77983856201172
train_batch_loss[ 40 ]:  26.687671661376953
train_batch_loss[ 50 ]:  24.70000457763672
train_batch_loss[ 60 ]:  25.67837142944336
train_batch_loss[ 70 ]:  25.52066421508789
train_batch_loss[ 80 ]:  25.48587989807129
train_batch_loss[ 90 ]:  25.60915756225586
train_batch_loss[ 100 ]:  27.051166534423828
train_batch_loss[ 110 ]:  24.26970863342285
train_batch_loss[ 120 ]:  23.374496459960938
train_batch_loss[ 130 ]:  26.735599517822266
train_batch_loss[ 140 ]:  24.747140884399414
train_batch_loss[ 150 ]:  23.061668395996094
train_batch_loss[ 160 ]:  24.722734451293945
train_batch_loss[ 170 ]:  23.789003372192383
train_batch_loss[ 180 ]:  24.68138313293457
train_batch_loss[ 190 ]:  22.923885345458984
train_batch_loss[ 200 ]:  25.117904663085938
train_batch_loss[ 210 ]:  25.955337524414062
train_batch_loss[ 220 ]:  23.483539581298828
train_batch_loss[ 23

train_batch_loss[ 1830 ]:  11.268327713012695
train_batch_loss[ 1840 ]:  10.457213401794434
train_batch_loss[ 1850 ]:  12.321795463562012
train_batch_loss[ 1860 ]:  9.384542465209961
train_batch_loss[ 1870 ]:  6.521623134613037
train_batch_loss[ 1880 ]:  8.652471542358398
train_batch_loss[ 1890 ]:  8.335664749145508
train_batch_loss[ 1900 ]:  8.399188041687012
train_batch_loss[ 1910 ]:  8.04068374633789
train_batch_loss[ 1920 ]:  7.094365119934082
Corrupted image for 4436875
train_batch_loss[ 1930 ]:  7.1413397789001465
train_batch_loss[ 1940 ]:  8.44589614868164
train_batch_loss[ 1950 ]:  9.468132019042969
train_batch_loss[ 1960 ]:  8.121112823486328
train_batch_loss[ 1970 ]:  7.2706122398376465
train_batch_loss[ 1980 ]:  7.10850715637207
train_batch_loss[ 1990 ]:  10.292952537536621
train_batch_loss[ 2000 ]:  6.333601951599121
train_batch_loss[ 2010 ]:  7.355423450469971
train_batch_loss[ 2020 ]:  7.241215229034424
train_batch_loss[ 2030 ]:  8.5230131149292
train_batch_loss[ 2040 ]: 

train_batch_loss[ 3630 ]:  2.8169517517089844
train_batch_loss[ 3640 ]:  3.662916660308838
train_batch_loss[ 3650 ]:  2.7854199409484863
train_batch_loss[ 3660 ]:  2.286363363265991
train_batch_loss[ 3670 ]:  2.63492488861084
train_batch_loss[ 3680 ]:  4.022756099700928
train_batch_loss[ 3690 ]:  2.150639533996582
train_batch_loss[ 3700 ]:  2.204545259475708
train_batch_loss[ 3710 ]:  3.2525794506073
train_batch_loss[ 3720 ]:  2.9519951343536377
train_batch_loss[ 3730 ]:  3.031076192855835
train_batch_loss[ 3740 ]:  2.363663673400879
train_batch_loss[ 3750 ]:  3.9293055534362793
train_batch_loss[ 3760 ]:  3.7389748096466064
train_batch_loss[ 3770 ]:  2.056551456451416
train_batch_loss[ 3780 ]:  3.18585205078125
train_batch_loss[ 3790 ]:  2.531254529953003
train_batch_loss[ 3800 ]:  3.5934948921203613
train_batch_loss[ 3810 ]:  3.289252758026123
train_batch_loss[ 3820 ]:  2.209596872329712
train_batch_loss[ 3830 ]:  3.4715890884399414
train_batch_loss[ 3840 ]:  3.1296064853668213
train_

Evaluate:   1%|▎                                                                      | 8/1568 [00:11<37:35,  1.45s/it]

Corrupted image for 467544


Evaluate:  45%|███████████████████████████████▏                                     | 709/1568 [16:48<20:12,  1.41s/it]

Corrupted image for 419552


Evaluate:  64%|███████████████████████████████████████████▌                        | 1005/1568 [23:48<13:15,  1.41s/it]

Corrupted image for 620308


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [37:10<00:00,  1.42s/it]


valid_evaluation: loss=2.5910161001522902, acc=0.7123492962799632
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_v4_004000_loss2.5910161001522902.pt
train_batch_loss[ 4010 ]:  2.426466703414917
train_batch_loss[ 4020 ]:  3.6204166412353516
train_batch_loss[ 4030 ]:  2.31118106842041
train_batch_loss[ 4040 ]:  4.372664451599121
train_batch_loss[ 4050 ]:  1.9227244853973389
train_batch_loss[ 4060 ]:  3.2224416732788086
train_batch_loss[ 4070 ]:  4.386934280395508
train_batch_loss[ 4080 ]:  2.968515396118164
train_batch_loss[ 4090 ]:  2.5307958126068115
train_batch_loss[ 4100 ]:  1.7993866205215454
train_batch_loss[ 4110 ]:  1.7593986988067627
train_batch_loss[ 4120 ]:  2.17680287361145
train_batch_loss[ 4130 ]:  3.877945899963379
train_batch_loss[ 4140 ]:  2.419964551925659
train_batch_loss[ 4150 ]:  1.4512479305267334
train_batch_loss[ 4160 ]:  1.5821154117584229
train_batch_loss[ 4170 ]:  1.4139971733093262
train_batch_loss[ 4180 ]:  3.0875329971313477
tr

train_batch_loss[ 5770 ]:  1.7956368923187256
train_batch_loss[ 5780 ]:  1.7829428911209106
train_batch_loss[ 5790 ]:  2.663888931274414
train_batch_loss[ 5800 ]:  2.6835131645202637
train_batch_loss[ 5810 ]:  2.6817588806152344
train_batch_loss[ 5820 ]:  1.722990870475769
train_batch_loss[ 5830 ]:  2.2394676208496094
train_batch_loss[ 5840 ]:  1.5378696918487549
train_batch_loss[ 5850 ]:  2.885404586791992
train_batch_loss[ 5860 ]:  1.8839598894119263
train_batch_loss[ 5870 ]:  1.6892343759536743
train_batch_loss[ 5880 ]:  3.52044677734375
train_batch_loss[ 5890 ]:  2.92002272605896
train_batch_loss[ 5900 ]:  1.881901502609253
train_batch_loss[ 5910 ]:  2.578514575958252
train_batch_loss[ 5920 ]:  1.732380986213684
train_batch_loss[ 5930 ]:  2.821031093597412
train_batch_loss[ 5940 ]:  3.052427291870117
train_batch_loss[ 5950 ]:  2.9459924697875977
train_batch_loss[ 5960 ]:  2.1055939197540283
train_batch_loss[ 5970 ]:  1.8124606609344482
train_batch_loss[ 5980 ]:  1.1530039310455322


train_batch_loss[ 7570 ]:  2.9470577239990234
train_batch_loss[ 7580 ]:  0.8497297763824463
train_batch_loss[ 7590 ]:  2.9133386611938477
train_batch_loss[ 7600 ]:  2.042278528213501
train_batch_loss[ 7610 ]:  1.2354214191436768
train_batch_loss[ 7620 ]:  0.5121327042579651
train_batch_loss[ 7630 ]:  2.00776743888855
train_batch_loss[ 7640 ]:  1.0664833784103394
train_batch_loss[ 7650 ]:  2.439478874206543
train_batch_loss[ 7660 ]:  1.211403727531433
train_batch_loss[ 7670 ]:  1.444502830505371
train_batch_loss[ 7680 ]:  2.414537191390991
train_batch_loss[ 7690 ]:  1.7984569072723389
train_batch_loss[ 7700 ]:  1.0869234800338745
train_batch_loss[ 7710 ]:  1.7626821994781494
train_batch_loss[ 7720 ]:  0.9595363140106201
train_batch_loss[ 7730 ]:  1.7704716920852661
train_batch_loss[ 7740 ]:  1.4982683658599854
train_batch_loss[ 7750 ]:  1.4499356746673584
train_batch_loss[ 7760 ]:  1.2026305198669434
train_batch_loss[ 7770 ]:  1.9573192596435547
train_batch_loss[ 7780 ]:  2.419201850891

Evaluate:   0%|▎                                                                      | 6/1568 [00:08<36:13,  1.39s/it]

Corrupted image for 467544


Evaluate:  79%|█████████████████████████████████████████████████████▌              | 1236/1568 [29:35<07:49,  1.42s/it]

Corrupted image for 419552


Evaluate:  88%|███████████████████████████████████████████████████████████▊        | 1379/1568 [33:00<04:25,  1.41s/it]

Corrupted image for 620308


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [37:32<00:00,  1.44s/it]


valid_evaluation: loss=1.8641845750114072, acc=0.7844603567308722
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_v4_008000_loss1.8641845750114072.pt
train_batch_loss[ 8010 ]:  1.1862125396728516
train_batch_loss[ 8020 ]:  1.9219000339508057
train_batch_loss[ 8030 ]:  0.7429559230804443
train_batch_loss[ 8040 ]:  1.4282174110412598
train_batch_loss[ 8050 ]:  2.44954514503479
train_batch_loss[ 8060 ]:  2.1809768676757812
train_batch_loss[ 8070 ]:  2.0010876655578613
train_batch_loss[ 8080 ]:  0.9646235704421997
train_batch_loss[ 8090 ]:  0.34759217500686646
train_batch_loss[ 8100 ]:  1.202235460281372
train_batch_loss[ 8110 ]:  1.8848203420639038
train_batch_loss[ 8120 ]:  2.1511449813842773
train_batch_loss[ 8130 ]:  1.8935967683792114
train_batch_loss[ 8140 ]:  1.7064440250396729
train_batch_loss[ 8150 ]:  1.8224109411239624
train_batch_loss[ 8160 ]:  1.653502106666565
train_batch_loss[ 8170 ]:  1.5827240943908691
train_batch_loss[ 8180 ]:  2.120045661926

train_batch_loss[ 9770 ]:  1.9508543014526367
train_batch_loss[ 9780 ]:  0.9186092615127563
train_batch_loss[ 9790 ]:  1.090134620666504
train_batch_loss[ 9800 ]:  0.6523818373680115
train_batch_loss[ 9810 ]:  1.8868460655212402
train_batch_loss[ 9820 ]:  1.0311508178710938
train_batch_loss[ 9830 ]:  1.0425450801849365
train_batch_loss[ 9840 ]:  1.669570803642273
train_batch_loss[ 9850 ]:  0.8282973766326904
train_batch_loss[ 9860 ]:  1.6051082611083984
train_batch_loss[ 9870 ]:  3.509739398956299
train_batch_loss[ 9880 ]:  2.029191017150879
train_batch_loss[ 9890 ]:  1.837756633758545
train_batch_loss[ 9900 ]:  1.6050660610198975
train_batch_loss[ 9910 ]:  1.0356165170669556
train_batch_loss[ 9920 ]:  1.459132432937622
train_batch_loss[ 9930 ]:  2.490138530731201
train_batch_loss[ 9940 ]:  1.433144450187683
train_batch_loss[ 9950 ]:  1.9271469116210938
train_batch_loss[ 9960 ]:  1.153168797492981
train_batch_loss[ 9970 ]:  0.9572972059249878
train_batch_loss[ 9980 ]:  1.89019703865051

train_batch_loss[ 11530 ]:  1.7310800552368164
train_batch_loss[ 11540 ]:  1.8591721057891846
train_batch_loss[ 11550 ]:  1.5932904481887817
train_batch_loss[ 11560 ]:  1.7765257358551025
train_batch_loss[ 11570 ]:  1.8646328449249268
train_batch_loss[ 11580 ]:  2.97763729095459
train_batch_loss[ 11590 ]:  1.8220734596252441
train_batch_loss[ 11600 ]:  1.1716487407684326
train_batch_loss[ 11610 ]:  1.2768770456314087
train_batch_loss[ 11620 ]:  1.331760287284851
train_batch_loss[ 11630 ]:  1.6578137874603271
train_batch_loss[ 11640 ]:  2.7281136512756348
train_batch_loss[ 11650 ]:  0.9357215166091919
train_batch_loss[ 11660 ]:  2.3460941314697266
train_batch_loss[ 11670 ]:  1.1540029048919678
train_batch_loss[ 11680 ]:  1.1226720809936523
train_batch_loss[ 11690 ]:  1.137709379196167
train_batch_loss[ 11700 ]:  1.2252631187438965
train_batch_loss[ 11710 ]:  1.0961520671844482
train_batch_loss[ 11720 ]:  1.8313096761703491
train_batch_loss[ 11730 ]:  1.700236439704895
train_batch_loss[ 

Evaluate:  38%|██████████████████████████▎                                          | 599/1568 [14:15<23:31,  1.46s/it]

Corrupted image for 620308


Evaluate:  86%|██████████████████████████████████████████████████████████▎         | 1344/1568 [32:10<05:12,  1.40s/it]

Corrupted image for 419552


Evaluate:  93%|███████████████████████████████████████████████████████████████▍    | 1462/1568 [34:58<02:33,  1.45s/it]

Corrupted image for 467544


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [37:29<00:00,  1.43s/it]


valid_evaluation: loss=1.4699078681275808, acc=0.8244325019246724
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_v4_012000_loss1.4699078681275808.pt
train_batch_loss[ 12010 ]:  0.6805469393730164
train_batch_loss[ 12020 ]:  1.7339507341384888
train_batch_loss[ 12030 ]:  1.6245990991592407
train_batch_loss[ 12040 ]:  1.0680291652679443
train_batch_loss[ 12050 ]:  1.3058924674987793
train_batch_loss[ 12060 ]:  1.0454955101013184
train_batch_loss[ 12070 ]:  0.51509028673172
train_batch_loss[ 12080 ]:  2.5891823768615723
train_batch_loss[ 12090 ]:  2.129519462585449
train_batch_loss[ 12100 ]:  0.9962986707687378
train_batch_loss[ 12110 ]:  0.657320499420166
train_batch_loss[ 12120 ]:  1.965468168258667
train_batch_loss[ 12130 ]:  1.4937013387680054
train_batch_loss[ 12140 ]:  1.4084279537200928
train_batch_loss[ 12150 ]:  1.1554030179977417
train_batch_loss[ 12160 ]:  1.3371338844299316
train_batch_loss[ 12170 ]:  1.319129228591919
train_batch_loss[ 12180 ]: 

train_batch_loss[ 13730 ]:  1.5096819400787354
train_batch_loss[ 13740 ]:  2.332642078399658
train_batch_loss[ 13750 ]:  2.094013214111328
train_batch_loss[ 13760 ]:  2.6536731719970703
train_batch_loss[ 13770 ]:  2.211915969848633
train_batch_loss[ 13780 ]:  2.6942338943481445
train_batch_loss[ 13790 ]:  1.9785534143447876
train_batch_loss[ 13800 ]:  1.2898119688034058
train_batch_loss[ 13810 ]:  3.5751256942749023
train_batch_loss[ 13820 ]:  2.092632293701172
train_batch_loss[ 13830 ]:  0.5683531761169434
train_batch_loss[ 13840 ]:  1.9962780475616455
train_batch_loss[ 13850 ]:  1.7485177516937256
train_batch_loss[ 13860 ]:  0.8922043442726135
train_batch_loss[ 13870 ]:  1.2186479568481445
train_batch_loss[ 13880 ]:  1.320364236831665
train_batch_loss[ 13890 ]:  0.9273020029067993
train_batch_loss[ 13900 ]:  1.0284342765808105
train_batch_loss[ 13910 ]:  0.6733769178390503
train_batch_loss[ 13920 ]:  1.6760742664337158
train_batch_loss[ 13930 ]:  1.325414776802063
train_batch_loss[ 1

train_batch_loss[ 15490 ]:  1.278699278831482
train_batch_loss[ 15500 ]:  0.8686249852180481
train_batch_loss[ 15510 ]:  1.5333261489868164
train_batch_loss[ 15520 ]:  1.4445691108703613
train_batch_loss[ 15530 ]:  0.8686215877532959
train_batch_loss[ 15540 ]:  1.3180277347564697
train_batch_loss[ 15550 ]:  1.2999478578567505
train_batch_loss[ 15560 ]:  1.447455883026123
train_batch_loss[ 15570 ]:  1.2375706434249878
train_batch_loss[ 15580 ]:  0.9985144734382629
train_batch_loss[ 15590 ]:  0.8281264305114746
train_batch_loss[ 15600 ]:  1.6658416986465454
train_batch_loss[ 15610 ]:  1.3043135404586792
train_batch_loss[ 15620 ]:  2.8064322471618652
train_batch_loss[ 15630 ]:  0.866727352142334
train_batch_loss[ 15640 ]:  0.7851890325546265
train_batch_loss[ 15650 ]:  0.9641305208206177
train_batch_loss[ 15660 ]:  1.0615772008895874
train_batch_loss[ 15670 ]:  2.996303081512451
train_batch_loss[ 15680 ]:  1.091420292854309
train_batch_loss[ 15690 ]:  1.0758380889892578
train_batch_loss[ 

Evaluate:  23%|███████████████▉                                                     | 362/1568 [08:17<27:43,  1.38s/it]

Corrupted image for 419552


Evaluate:  38%|█████████████████████████▉                                           | 589/1568 [13:36<22:48,  1.40s/it]

Corrupted image for 467544


Evaluate:  45%|██████████████████████████████▋                                      | 698/1568 [16:12<20:49,  1.44s/it]

Corrupted image for 620308


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [37:02<00:00,  1.42s/it]


valid_evaluation: loss=1.2746191259321709, acc=0.8437028953551239
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_v4_016000_loss1.2746191259321709.pt
train_batch_loss[ 16010 ]:  0.8815294504165649
train_batch_loss[ 16020 ]:  1.0379096269607544
train_batch_loss[ 16030 ]:  3.151315927505493
train_batch_loss[ 16040 ]:  1.1413228511810303
train_batch_loss[ 16050 ]:  0.6514186859130859
train_batch_loss[ 16060 ]:  1.0221054553985596
train_batch_loss[ 16070 ]:  0.9209088683128357
train_batch_loss[ 16080 ]:  0.9657056927680969
train_batch_loss[ 16090 ]:  1.6169655323028564
train_batch_loss[ 16100 ]:  1.2811819314956665
train_batch_loss[ 16110 ]:  1.777127981185913
train_batch_loss[ 16120 ]:  0.792959451675415
train_batch_loss[ 16130 ]:  1.550952672958374
train_batch_loss[ 16140 ]:  0.73374342918396
train_batch_loss[ 16150 ]:  0.7667437195777893
train_batch_loss[ 16160 ]:  1.0384705066680908
train_batch_loss[ 16170 ]:  1.9162617921829224
train_batch_loss[ 16180 ]: 

train_batch_loss[ 17730 ]:  0.7583804130554199
train_batch_loss[ 17740 ]:  0.7295619249343872
train_batch_loss[ 17750 ]:  0.7912274599075317
train_batch_loss[ 17760 ]:  1.848656177520752
train_batch_loss[ 17770 ]:  1.6335058212280273
train_batch_loss[ 17780 ]:  0.8230836987495422
train_batch_loss[ 17790 ]:  0.6397532224655151
train_batch_loss[ 17800 ]:  1.6833720207214355
train_batch_loss[ 17810 ]:  0.5861645936965942
train_batch_loss[ 17820 ]:  1.415220856666565
train_batch_loss[ 17830 ]:  0.5965002179145813
train_batch_loss[ 17840 ]:  2.111299753189087
train_batch_loss[ 17850 ]:  1.038423776626587
train_batch_loss[ 17860 ]:  0.3570224642753601
train_batch_loss[ 17870 ]:  1.9320693016052246
train_batch_loss[ 17880 ]:  2.0351126194000244
train_batch_loss[ 17890 ]:  0.883965253829956
train_batch_loss[ 17900 ]:  1.1396024227142334
train_batch_loss[ 17910 ]:  1.709903597831726
train_batch_loss[ 17920 ]:  0.9379947185516357
train_batch_loss[ 17930 ]:  1.253065824508667
train_batch_loss[ 17

train_batch_loss[ 19490 ]:  1.8878053426742554
train_batch_loss[ 19500 ]:  1.4001671075820923
train_batch_loss[ 19510 ]:  0.7506187558174133
train_batch_loss[ 19520 ]:  1.8824795484542847
train_batch_loss[ 19530 ]:  0.8933290243148804
train_batch_loss[ 19540 ]:  0.8906881809234619
train_batch_loss[ 19550 ]:  1.3308544158935547
train_batch_loss[ 19560 ]:  0.4894939363002777
train_batch_loss[ 19570 ]:  0.8190426826477051
train_batch_loss[ 19580 ]:  0.9402557611465454
train_batch_loss[ 19590 ]:  1.0573638677597046
train_batch_loss[ 19600 ]:  1.432919979095459
train_batch_loss[ 19610 ]:  0.7465406656265259
train_batch_loss[ 19620 ]:  0.5580884218215942
train_batch_loss[ 19630 ]:  1.1778957843780518
train_batch_loss[ 19640 ]:  0.7368118762969971
train_batch_loss[ 19650 ]:  0.8497350215911865
train_batch_loss[ 19660 ]:  1.3173893690109253
train_batch_loss[ 19670 ]:  0.8793010711669922
train_batch_loss[ 19680 ]:  0.9474632740020752
train_batch_loss[ 19690 ]:  0.7492504119873047
train_batch_lo

Evaluate:  31%|█████████████████████▋                                               | 493/1568 [11:15<25:11,  1.41s/it]

Corrupted image for 620308


Evaluate:  37%|█████████████████████████▊                                           | 587/1568 [13:28<22:59,  1.41s/it]

Corrupted image for 467544


Evaluate:  55%|██████████████████████████████████████▏                              | 867/1568 [20:06<16:32,  1.42s/it]

Corrupted image for 419552


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [36:45<00:00,  1.41s/it]


valid_evaluation: loss=1.1683830385527212, acc=0.859780450311062
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_v4_020000_loss1.1683830385527212.pt
train_batch_loss[ 20010 ]:  1.0592600107192993
train_batch_loss[ 20020 ]:  0.7161563634872437
train_batch_loss[ 20030 ]:  1.1520490646362305
train_batch_loss[ 20040 ]:  1.2394030094146729
train_batch_loss[ 20050 ]:  1.8266842365264893
train_batch_loss[ 20060 ]:  1.4768462181091309
train_batch_loss[ 20070 ]:  1.1347020864486694
train_batch_loss[ 20080 ]:  0.8773494362831116
train_batch_loss[ 20090 ]:  0.5319462418556213
train_batch_loss[ 20100 ]:  0.8944168090820312
train_batch_loss[ 20110 ]:  0.8837290406227112
train_batch_loss[ 20120 ]:  1.5137301683425903
train_batch_loss[ 20130 ]:  0.8203754425048828
train_batch_loss[ 20140 ]:  1.9442116022109985
train_batch_loss[ 20150 ]:  0.41714367270469666
train_batch_loss[ 20160 ]:  0.945157527923584
train_batch_loss[ 20170 ]:  0.288417249917984
train_batch_loss[ 20180

train_batch_loss[ 21720 ]:  1.168839454650879
train_batch_loss[ 21730 ]:  0.7149776816368103
train_batch_loss[ 21740 ]:  1.1935780048370361
train_batch_loss[ 21750 ]:  1.3009960651397705
train_batch_loss[ 21760 ]:  0.7056353092193604
train_batch_loss[ 21770 ]:  1.6159354448318481
train_batch_loss[ 21780 ]:  1.3086200952529907
train_batch_loss[ 21790 ]:  0.5393120050430298
train_batch_loss[ 21800 ]:  1.894417643547058
train_batch_loss[ 21810 ]:  0.6217003464698792
train_batch_loss[ 21820 ]:  1.4938104152679443
train_batch_loss[ 21830 ]:  3.098961353302002
train_batch_loss[ 21840 ]:  1.1701107025146484
train_batch_loss[ 21850 ]:  1.0624035596847534
train_batch_loss[ 21860 ]:  1.8342087268829346
train_batch_loss[ 21870 ]:  0.5851081013679504
train_batch_loss[ 21880 ]:  0.9252139925956726
train_batch_loss[ 21890 ]:  2.0091850757598877
train_batch_loss[ 21900 ]:  1.6331281661987305
train_batch_loss[ 21910 ]:  1.3600800037384033
train_batch_loss[ 21920 ]:  0.985822856426239
train_batch_loss[

train_batch_loss[ 23480 ]:  1.211047649383545
train_batch_loss[ 23490 ]:  2.594247817993164
train_batch_loss[ 23500 ]:  0.6103861331939697
train_batch_loss[ 23510 ]:  2.3740198612213135
train_batch_loss[ 23520 ]:  0.9430848956108093
train_batch_loss[ 23530 ]:  0.8292003870010376
train_batch_loss[ 23540 ]:  0.8243073225021362
train_batch_loss[ 23550 ]:  1.8426886796951294
train_batch_loss[ 23560 ]:  1.1235647201538086
train_batch_loss[ 23570 ]:  1.1065244674682617
train_batch_loss[ 23580 ]:  0.25863638520240784
train_batch_loss[ 23590 ]:  0.48037874698638916
train_batch_loss[ 23600 ]:  1.6995296478271484
train_batch_loss[ 23610 ]:  0.9645702838897705
train_batch_loss[ 23620 ]:  0.5070832967758179
train_batch_loss[ 23630 ]:  0.42630302906036377
train_batch_loss[ 23640 ]:  0.8787641525268555
train_batch_loss[ 23650 ]:  0.6220452785491943
train_batch_loss[ 23660 ]:  1.1810734272003174
train_batch_loss[ 23670 ]:  1.5161993503570557
train_batch_loss[ 23680 ]:  0.8974040746688843
train_batch_

Evaluate:  23%|████████████████                                                     | 364/1568 [08:16<27:59,  1.40s/it]

Corrupted image for 419552


Evaluate:  60%|█████████████████████████████████████████▌                           | 944/1568 [22:00<14:33,  1.40s/it]

Corrupted image for 620308


Evaluate:  60%|█████████████████████████████████████████▋                           | 948/1568 [22:06<14:25,  1.40s/it]

Corrupted image for 467544


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [36:51<00:00,  1.41s/it]


valid_evaluation: loss=1.0911384808359814, acc=0.8690562502647203
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_v4_024000_loss1.0911384808359814.pt
train_batch_loss[ 24010 ]:  0.7568923234939575
train_batch_loss[ 24020 ]:  0.832162618637085
train_batch_loss[ 24030 ]:  0.49896523356437683
train_batch_loss[ 24040 ]:  0.6734939813613892
train_batch_loss[ 24050 ]:  0.9256829619407654
train_batch_loss[ 24060 ]:  0.509151816368103
train_batch_loss[ 24070 ]:  0.9618184566497803
train_batch_loss[ 24080 ]:  2.3026247024536133
train_batch_loss[ 24090 ]:  1.4655420780181885
train_batch_loss[ 24100 ]:  0.9174954891204834
train_batch_loss[ 24110 ]:  0.7397018671035767
train_batch_loss[ 24120 ]:  1.0655136108398438
train_batch_loss[ 24130 ]:  1.196335792541504
train_batch_loss[ 24140 ]:  1.5778582096099854
train_batch_loss[ 24150 ]:  0.6773404479026794
train_batch_loss[ 24160 ]:  0.8595031499862671
train_batch_loss[ 24170 ]:  0.8417375087738037
train_batch_loss[ 24180

train_batch_loss[ 25730 ]:  0.5038971900939941
train_batch_loss[ 25740 ]:  0.5871235728263855
train_batch_loss[ 25750 ]:  0.9933419823646545
train_batch_loss[ 25760 ]:  1.2274901866912842
train_batch_loss[ 25770 ]:  1.4721348285675049
train_batch_loss[ 25780 ]:  1.1916542053222656
train_batch_loss[ 25790 ]:  1.0579851865768433
train_batch_loss[ 25800 ]:  0.8540694117546082
train_batch_loss[ 25810 ]:  0.5165430307388306
train_batch_loss[ 25820 ]:  1.59998619556427
train_batch_loss[ 25830 ]:  0.8341892957687378
train_batch_loss[ 25840 ]:  1.2135061025619507
train_batch_loss[ 25850 ]:  0.6643280982971191
train_batch_loss[ 25860 ]:  3.0865654945373535
train_batch_loss[ 25870 ]:  0.8290296792984009
train_batch_loss[ 25880 ]:  0.6272144317626953
train_batch_loss[ 25890 ]:  1.4817004203796387
train_batch_loss[ 25900 ]:  0.907218337059021
train_batch_loss[ 25910 ]:  0.5662586688995361
train_batch_loss[ 25920 ]:  2.063709259033203
train_batch_loss[ 25930 ]:  1.4615225791931152
train_batch_loss[

train_batch_loss[ 27480 ]:  0.41587987542152405
train_batch_loss[ 27490 ]:  0.706707239151001
train_batch_loss[ 27500 ]:  1.666064739227295
train_batch_loss[ 27510 ]:  0.8566018342971802
train_batch_loss[ 27520 ]:  1.4555013179779053
train_batch_loss[ 27530 ]:  2.006683826446533
train_batch_loss[ 27540 ]:  1.0440354347229004
train_batch_loss[ 27550 ]:  0.49658697843551636
train_batch_loss[ 27560 ]:  0.4449540376663208
train_batch_loss[ 27570 ]:  0.9934808015823364
train_batch_loss[ 27580 ]:  0.8780418634414673
train_batch_loss[ 27590 ]:  1.274935007095337
train_batch_loss[ 27600 ]:  0.5475436449050903
train_batch_loss[ 27610 ]:  0.9627453088760376
train_batch_loss[ 27620 ]:  1.5087814331054688
train_batch_loss[ 27630 ]:  0.8906733393669128
train_batch_loss[ 27640 ]:  1.046166181564331
train_batch_loss[ 27650 ]:  1.4377784729003906
train_batch_loss[ 27660 ]:  1.4424151182174683
train_batch_loss[ 27670 ]:  1.1837565898895264
train_batch_loss[ 27680 ]:  0.7382032871246338
train_batch_loss

Evaluate:   7%|████▉                                                                | 111/1568 [02:32<32:33,  1.34s/it]

Corrupted image for 467544


Evaluate:  42%|█████████████████████████████▏                                       | 662/1568 [15:22<21:12,  1.40s/it]

Corrupted image for 419552


Evaluate:  60%|█████████████████████████████████████████▏                           | 935/1568 [21:51<14:46,  1.40s/it]

Corrupted image for 620308


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [36:53<00:00,  1.41s/it]


valid_evaluation: loss=1.046315777225071, acc=0.8752089733336323
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_v4_028000_loss1.046315777225071.pt
train_batch_loss[ 28010 ]:  1.1455671787261963
train_batch_loss[ 28020 ]:  0.9069105386734009
train_batch_loss[ 28030 ]:  0.520601749420166
train_batch_loss[ 28040 ]:  0.6459288001060486
train_batch_loss[ 28050 ]:  1.2985332012176514
train_batch_loss[ 28060 ]:  0.4614044427871704
train_batch_loss[ 28070 ]:  0.7544516324996948
train_batch_loss[ 28080 ]:  2.083756923675537
train_batch_loss[ 28090 ]:  0.6142473220825195
train_batch_loss[ 28100 ]:  0.4491538405418396
train_batch_loss[ 28110 ]:  0.654760479927063
train_batch_loss[ 28120 ]:  1.3446617126464844
train_batch_loss[ 28130 ]:  1.4612029790878296
train_batch_loss[ 28140 ]:  1.1207010746002197
train_batch_loss[ 28150 ]:  0.8936185836791992
train_batch_loss[ 28160 ]:  1.1241614818572998
train_batch_loss[ 28170 ]:  0.9766456484794617
train_batch_loss[ 28180 ]:

train_batch_loss[ 29730 ]:  1.320411205291748
train_batch_loss[ 29740 ]:  1.2698674201965332
train_batch_loss[ 29750 ]:  0.13440145552158356
train_batch_loss[ 29760 ]:  0.28087541460990906
train_batch_loss[ 29770 ]:  1.2304327487945557
train_batch_loss[ 29780 ]:  0.6789864301681519
train_batch_loss[ 29790 ]:  1.7537357807159424
train_batch_loss[ 29800 ]:  0.7461385726928711
train_batch_loss[ 29810 ]:  0.6499950885772705
train_batch_loss[ 29820 ]:  0.6912811994552612
train_batch_loss[ 29830 ]:  0.8817577362060547
train_batch_loss[ 29840 ]:  0.371157705783844
train_batch_loss[ 29850 ]:  1.37943696975708
train_batch_loss[ 29860 ]:  0.6927722692489624
train_batch_loss[ 29870 ]:  0.5189571976661682
train_batch_loss[ 29880 ]:  0.8745607733726501
train_batch_loss[ 29890 ]:  1.0365968942642212
train_batch_loss[ 29900 ]:  1.084751009941101
train_batch_loss[ 29910 ]:  0.42968466877937317
train_batch_loss[ 29920 ]:  1.2912784814834595
train_batch_loss[ 29930 ]:  0.7261877059936523
train_batch_los

train_batch_loss[ 31480 ]:  1.128316044807434
train_batch_loss[ 31490 ]:  2.4863524436950684
train_batch_loss[ 31500 ]:  1.325146198272705
train_batch_loss[ 31510 ]:  1.2269136905670166
train_batch_loss[ 31520 ]:  1.2128708362579346
train_batch_loss[ 31530 ]:  0.8089101910591125
train_batch_loss[ 31540 ]:  0.7620035409927368
train_batch_loss[ 31550 ]:  0.411197304725647
train_batch_loss[ 31560 ]:  1.45510995388031
train_batch_loss[ 31570 ]:  0.3672226071357727
train_batch_loss[ 31580 ]:  1.029029130935669
Corrupted image for 1069418
train_batch_loss[ 31590 ]:  1.2250654697418213
train_batch_loss[ 31600 ]:  0.3529466986656189
train_batch_loss[ 31610 ]:  0.736397385597229
train_batch_loss[ 31620 ]:  0.6769452095031738
train_batch_loss[ 31630 ]:  1.512371301651001
train_batch_loss[ 31640 ]:  1.3640189170837402
train_batch_loss[ 31650 ]:  1.2155942916870117
train_batch_loss[ 31660 ]:  0.9176876544952393
train_batch_loss[ 31670 ]:  0.4307023882865906
train_batch_loss[ 31680 ]:  0.3114784359

Evaluate:  65%|████████████████████████████████████████████▍                       | 1026/1568 [24:04<12:50,  1.42s/it]

Corrupted image for 620308


Evaluate:  79%|█████████████████████████████████████████████████████▌              | 1234/1568 [28:59<07:50,  1.41s/it]

Corrupted image for 467544


Evaluate:  94%|████████████████████████████████████████████████████████████████▏   | 1480/1568 [34:51<02:02,  1.39s/it]

Corrupted image for 419552


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [36:56<00:00,  1.41s/it]


valid_evaluation: loss=0.9620996700228386, acc=0.8836451427247382
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_v4_032000_loss0.9620996700228386.pt
train_batch_loss[ 32010 ]:  0.6510888934135437
train_batch_loss[ 32020 ]:  1.4479916095733643
train_batch_loss[ 32030 ]:  0.9388759732246399
train_batch_loss[ 32040 ]:  1.4424680471420288
train_batch_loss[ 32050 ]:  1.1269235610961914
train_batch_loss[ 32060 ]:  0.5553721189498901
train_batch_loss[ 32070 ]:  1.1926536560058594
train_batch_loss[ 32080 ]:  1.0680854320526123
train_batch_loss[ 32090 ]:  0.934040904045105
train_batch_loss[ 32100 ]:  0.11037808656692505
train_batch_loss[ 32110 ]:  0.5916529893875122
train_batch_loss[ 32120 ]:  1.2502155303955078
train_batch_loss[ 32130 ]:  0.6393215656280518
train_batch_loss[ 32140 ]:  0.5094942450523376
train_batch_loss[ 32150 ]:  0.46267467737197876
train_batch_loss[ 32160 ]:  1.1077786684036255
train_batch_loss[ 32170 ]:  1.5681322813034058
train_batch_loss[ 32

train_batch_loss[ 33720 ]:  1.2727960348129272
train_batch_loss[ 33730 ]:  0.6030315160751343
train_batch_loss[ 33740 ]:  0.3684017062187195
train_batch_loss[ 33750 ]:  1.9225295782089233
train_batch_loss[ 33760 ]:  1.426461935043335
train_batch_loss[ 33770 ]:  1.4024685621261597
train_batch_loss[ 33780 ]:  1.0891923904418945
train_batch_loss[ 33790 ]:  0.7503947019577026
train_batch_loss[ 33800 ]:  1.7185474634170532
train_batch_loss[ 33810 ]:  0.9990147948265076
train_batch_loss[ 33820 ]:  1.983872890472412
train_batch_loss[ 33830 ]:  0.9504688382148743
train_batch_loss[ 33840 ]:  0.47486937046051025
train_batch_loss[ 33850 ]:  1.2433816194534302
train_batch_loss[ 33860 ]:  0.856120228767395
train_batch_loss[ 33870 ]:  0.48118171095848083
train_batch_loss[ 33880 ]:  2.071601629257202
train_batch_loss[ 33890 ]:  0.580980122089386
train_batch_loss[ 33900 ]:  1.5085004568099976
train_batch_loss[ 33910 ]:  0.8640488386154175
train_batch_loss[ 33920 ]:  0.6918427348136902
train_batch_loss

train_batch_loss[ 35470 ]:  1.222914218902588
train_batch_loss[ 35480 ]:  1.4378273487091064
train_batch_loss[ 35490 ]:  0.5618899464607239
train_batch_loss[ 35500 ]:  0.9565228223800659
train_batch_loss[ 35510 ]:  1.8209542036056519
train_batch_loss[ 35520 ]:  1.256354570388794
train_batch_loss[ 35530 ]:  1.74928617477417
train_batch_loss[ 35540 ]:  0.8676690459251404
train_batch_loss[ 35550 ]:  0.4099070429801941
train_batch_loss[ 35560 ]:  0.7240078449249268
train_batch_loss[ 35570 ]:  0.6240538358688354
train_batch_loss[ 35580 ]:  1.4070003032684326
train_batch_loss[ 35590 ]:  0.7076072096824646
train_batch_loss[ 35600 ]:  1.4061739444732666
train_batch_loss[ 35610 ]:  0.434579998254776
train_batch_loss[ 35620 ]:  0.9449865221977234
train_batch_loss[ 35630 ]:  0.824094295501709
train_batch_loss[ 35640 ]:  1.3431538343429565
train_batch_loss[ 35650 ]:  1.4357088804244995
train_batch_loss[ 35660 ]:  1.7796039581298828
train_batch_loss[ 35670 ]:  2.0506644248962402
train_batch_loss[ 3

Evaluate:  93%|███████████████████████████████████████████████████████████████▌    | 1465/1568 [34:34<02:36,  1.52s/it]

Corrupted image for 620308


Evaluate:  98%|██████████████████████████████████████████████████████████████████▊ | 1540/1568 [36:21<00:40,  1.43s/it]

Corrupted image for 467544


Evaluate:  99%|███████████████████████████████████████████████████████████████████▍| 1556/1568 [36:43<00:17,  1.42s/it]

Corrupted image for 419552


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [37:00<00:00,  1.42s/it]


valid_evaluation: loss=0.9482514772701548, acc=0.8870435287405293
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_v4_036000_loss0.9482514772701548.pt
train_batch_loss[ 36010 ]:  0.848118245601654
train_batch_loss[ 36020 ]:  1.4538928270339966
train_batch_loss[ 36030 ]:  0.6547622084617615
train_batch_loss[ 36040 ]:  0.7061010599136353
train_batch_loss[ 36050 ]:  0.9887065887451172
train_batch_loss[ 36060 ]:  1.7464205026626587
train_batch_loss[ 36070 ]:  0.8734697103500366
train_batch_loss[ 36080 ]:  0.4703316390514374
train_batch_loss[ 36090 ]:  0.48885810375213623
train_batch_loss[ 36100 ]:  1.3037772178649902
train_batch_loss[ 36110 ]:  0.9719893336296082
train_batch_loss[ 36120 ]:  0.21647050976753235
train_batch_loss[ 36130 ]:  0.9411032199859619
train_batch_loss[ 36140 ]:  0.6346971988677979
train_batch_loss[ 36150 ]:  0.23428255319595337
train_batch_loss[ 36160 ]:  0.7142961025238037
train_batch_loss[ 36170 ]:  1.5100992918014526
train_batch_loss[ 3

train_batch_loss[ 37720 ]:  0.5128768086433411
train_batch_loss[ 37730 ]:  0.6953520774841309
train_batch_loss[ 37740 ]:  0.840944230556488
train_batch_loss[ 37750 ]:  0.2912856936454773
train_batch_loss[ 37760 ]:  1.9295233488082886
train_batch_loss[ 37770 ]:  0.5595024824142456
train_batch_loss[ 37780 ]:  1.1535258293151855
train_batch_loss[ 37790 ]:  0.896924614906311
train_batch_loss[ 37800 ]:  0.8572982549667358
train_batch_loss[ 37810 ]:  1.256028652191162
train_batch_loss[ 37820 ]:  0.17025253176689148
train_batch_loss[ 37830 ]:  1.1865615844726562
train_batch_loss[ 37840 ]:  0.545132040977478
train_batch_loss[ 37850 ]:  1.2871639728546143
train_batch_loss[ 37860 ]:  0.8234564065933228
train_batch_loss[ 37870 ]:  0.38284832239151
train_batch_loss[ 37880 ]:  1.0871349573135376
train_batch_loss[ 37890 ]:  1.6348023414611816
train_batch_loss[ 37900 ]:  2.43050217628479
train_batch_loss[ 37910 ]:  0.40800878405570984
train_batch_loss[ 37920 ]:  0.32987067103385925
train_batch_loss[ 

train_batch_loss[ 39470 ]:  0.5997105836868286
train_batch_loss[ 39480 ]:  0.23420503735542297
train_batch_loss[ 39490 ]:  1.774496078491211
train_batch_loss[ 39500 ]:  0.4454326331615448
train_batch_loss[ 39510 ]:  1.1201167106628418
train_batch_loss[ 39520 ]:  1.774834394454956
train_batch_loss[ 39530 ]:  1.2788617610931396
train_batch_loss[ 39540 ]:  0.39084869623184204
train_batch_loss[ 39550 ]:  0.6877830028533936
train_batch_loss[ 39560 ]:  1.4367462396621704
train_batch_loss[ 39570 ]:  0.5250895023345947
train_batch_loss[ 39580 ]:  0.6594809889793396
train_batch_loss[ 39590 ]:  0.5872838497161865
train_batch_loss[ 39600 ]:  0.9038103818893433
train_batch_loss[ 39610 ]:  1.6385602951049805
train_batch_loss[ 39620 ]:  2.288339614868164
train_batch_loss[ 39630 ]:  0.9192866086959839
train_batch_loss[ 39640 ]:  0.5606521964073181
train_batch_loss[ 39650 ]:  0.262844055891037
train_batch_loss[ 39660 ]:  1.2145240306854248
train_batch_loss[ 39670 ]:  0.663723349571228
train_batch_loss

Evaluate:   8%|█████▎                                                               | 122/1568 [02:46<33:21,  1.38s/it]

Corrupted image for 467544


Evaluate:  12%|████████▍                                                            | 191/1568 [04:22<32:54,  1.43s/it]

Corrupted image for 620308


Evaluate:  60%|█████████████████████████████████████████▏                           | 935/1568 [21:57<14:58,  1.42s/it]

Corrupted image for 419552


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [36:57<00:00,  1.41s/it]

valid_evaluation: loss=0.9095548662792806, acc=0.8900955982928342
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_v4_040000_loss0.9095548662792806.pt
train_loss:  2.3544757948610933





In [42]:
valid_history2 = []
for i in range(len(valid_history)):
    evaluation = {
        'loss': valid_history[i]['loss'],
        'acc': valid_history[i]['acc'],
    }
    valid_history2.append(evaluation)
valid_history2

[{'loss': 2.5910161001522902, 'acc': 0.7123492962799632},
 {'loss': 1.8641845750114072, 'acc': 0.7844603567308722},
 {'loss': 1.4699078681275808, 'acc': 0.8244325019246724},
 {'loss': 1.2746191259321709, 'acc': 0.8437028953551239},
 {'loss': 1.1683830385527212, 'acc': 0.859780450311062},
 {'loss': 1.0911384808359814, 'acc': 0.8690562502647203},
 {'loss': 1.046315777225071, 'acc': 0.8752089733336323},
 {'loss': 0.9620996700228386, 'acc': 0.8836451427247382},
 {'loss': 0.9482514772701548, 'acc': 0.8870435287405293},
 {'loss': 0.9095548662792806, 'acc': 0.8900955982928342}]

In [43]:

import os
import pickle

 
with open('D:/lab2/深度学习课程设计/crnn-pytorch-master/train_history/mobileVit_rnn_3_loss', 'wb') as fp:
    pickle.dump(loss_history, fp)
     
with open('D:/lab2/深度学习课程设计/crnn-pytorch-master/train_history/mobileVit_rnn_3_valid', 'wb') as fp:
    pickle.dump(valid_history2, fp)

In [44]:
with open ('D:/lab2/深度学习课程设计/crnn-pytorch-master/train_history/mobileVit_rnn_3_loss', 'rb') as fp:
    temp = pickle.load(fp)
temp

[26.32318115234375,
 25.92038917541504,
 25.77983856201172,
 26.687671661376953,
 24.70000457763672,
 25.67837142944336,
 25.52066421508789,
 25.48587989807129,
 25.60915756225586,
 27.051166534423828,
 24.26970863342285,
 23.374496459960938,
 26.735599517822266,
 24.747140884399414,
 23.061668395996094,
 24.722734451293945,
 23.789003372192383,
 24.68138313293457,
 22.923885345458984,
 25.117904663085938,
 25.955337524414062,
 23.483539581298828,
 24.428390502929688,
 25.73165512084961,
 23.33498191833496,
 24.1345157623291,
 25.130659103393555,
 24.80514907836914,
 26.1441650390625,
 23.992998123168945,
 24.118572235107422,
 25.045103073120117,
 24.433238983154297,
 24.65667724609375,
 23.566417694091797,
 23.89128875732422,
 25.205039978027344,
 23.609172821044922,
 23.55706024169922,
 23.761512756347656,
 22.978084564208984,
 23.42521858215332,
 23.377002716064453,
 26.37904930114746,
 24.741548538208008,
 22.946739196777344,
 23.64943504333496,
 23.830875396728516,
 22.28328704833

# mobilevit v2

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import timm

class MobileVit_improv(nn.Module):
    def __init__(self,img_channel,  img_height, img_width,num_class,rnn_hidden=256):
        super(MobileVit_improv, self).__init__()
        self.model = timm.create_model('mobilevit_s.cvnets_in1k', pretrained=False,features_only=True)
        self.model.stem.conv = nn.Conv2d(1, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        
        map_to_seq_hidden = 1000
        #self.model.head.fc = nn.Linear(self.model.head.in_features, map_to_seq)
        output_channel, output_height, output_width = self._compute_output_shape(img_channel, img_height, img_width)
        
        self.map_to_seq = nn.Linear(output_channel * output_height, map_to_seq_hidden)
        self.rnn1 = nn.LSTM(map_to_seq_hidden, rnn_hidden, bidirectional=True)
        self.rnn2 = nn.LSTM(2 * rnn_hidden, rnn_hidden, bidirectional=True)

        self.dense = nn.Linear(2 * rnn_hidden, num_class)
        
    
    
    def forward(self, x):
        x = self.model(x)[1]
        #print(x)
        batch, channel, height, width = x.size()
        #print('batch, channel, height, width')
        #print(batch, channel, height, width)
        x = x.view(batch, channel * height, width)
        x = x.permute(2, 0, 1)  # (width, batch, feature)
        #print(x.size())
        seq = self.map_to_seq(x)

        recurrent, _ = self.rnn1(seq)
        recurrent, _ = self.rnn2(recurrent)

        output = self.dense(recurrent)
        return output  # shape: (seq_len, batch, num_class)
        

    def _compute_output_shape(self,img_channel,  img_height, img_width):
        dummy_input = torch.randn(1, img_channel, img_height, img_width)
        dummy_output = self.model(dummy_input)
        #print(len(dummy_output))
        
        _, channels, height, width = dummy_output[1].size()
        #print('hihi')
        #print(channels, height, width)
        return channels, height, width

In [16]:
num_class = len(Synth90kDataset.LABEL2CHAR) + 1
model = MobileVit_improv(1,32,100,num_class)
#model.load_state_dict(torch.load('D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobileVit_chars74k_2.pth'),strict=False)
dummy_input = torch.randn(5, 1, 32, 100)
dummy_output =model(dummy_input)
dummy_output.size()

torch.Size([25, 5, 512])


torch.Size([25, 5, 37])

In [20]:

epochs = config['epochs']
train_batch_size = config['train_batch_size']
eval_batch_size = config['eval_batch_size']
lr = config['lr']
show_interval = config['show_interval']
valid_interval = config['valid_interval']
save_interval = config['save_interval']
end_interval = 40000
cpu_workers = config['cpu_workers']
reload_checkpoint = config['reload_checkpoint']
valid_max_iter = config['valid_max_iter']

img_width = config['img_width']
img_height = config['img_height']
img_channel = config['img_channel']
data_dir = config['data_dir']
#print(img_width,img_height)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'device: {device}')

train_dataset = Synth90kDataset(root_dir=data_dir, mode='train',
                                img_height=img_height, img_width=img_width)
valid_dataset = Synth90kDataset(root_dir=data_dir, mode='dev',
                                img_height=img_height, img_width=img_width)

print(valid_dataset.__len__())
train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=train_batch_size,
    shuffle=True,
    num_workers=0,
    collate_fn=synth90k_collate_fn)
valid_loader = DataLoader(
    dataset=valid_dataset,
    batch_size=eval_batch_size,
    shuffle=True,
    num_workers=0,
    collate_fn=synth90k_collate_fn)

num_class = len(Synth90kDataset.LABEL2CHAR) + 1
'''
crnn = CRNN(1, img_height, img_width, num_class,
            map_to_seq_hidden=config['map_to_seq_hidden'],
            rnn_hidden=config['rnn_hidden'],
            leaky_relu=config['leaky_relu'])
if reload_checkpoint:
    crnn.load_state_dict(torch.load(reload_checkpoint, map_location=device))
'''  
    
crnn = MobileVit_improv(1,32,100,num_class)
#crnn.load_state_dict(torch.load('D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobileVit_chars74k_2.pth'),strict=False)


crnn.to(device)

optimizer = optim.Adam(crnn.parameters())
criterion = CTCLoss(reduction='sum', zero_infinity=True)
criterion.to(device)
loss_history=[]
valid_history=[]
assert save_interval % valid_interval == 0
i = 1
for epoch in range(1, epochs + 1):
    print(f'epoch: {epoch}')
    tot_train_loss = 0.
    tot_train_count = 0
    for train_data in train_loader:
        loss = train_batch(crnn, train_data, optimizer, criterion, device)
        train_size = train_data[0].size(0)

        tot_train_loss += loss
        tot_train_count += train_size
        if i % show_interval == 0:
            print('train_batch_loss[', i, ']: ', loss / train_size)
            loss_history.append(loss / train_size)

        if i % valid_interval == 0:
            evaluation = evaluate(crnn, valid_loader, criterion,
                                  decode_method=config['decode_method'],
                                  beam_size=config['beam_size'])
            print('valid_evaluation: loss={loss}, acc={acc}'.format(**evaluation))
            valid_history.append(evaluation)
            if i % save_interval == 0:
                prefix = 'mobilevit_rnn_v2'
                loss = evaluation['loss']
                save_model_path = os.path.join(config['checkpoints_dir'],
                                               f'{prefix}_{i:06}_loss{loss}.pt')
                torch.save(crnn.state_dict(), save_model_path)
                print('save model at ', save_model_path)

        if i % end_interval == 0:
            break
        i += 1

    print('train_loss: ', tot_train_loss / tot_train_count)

device: cuda
802734
epoch: 1
train_batch_loss[ 10 ]:  27.554855346679688
train_batch_loss[ 20 ]:  25.780851364135742
train_batch_loss[ 30 ]:  25.15695571899414
train_batch_loss[ 40 ]:  25.935163497924805
train_batch_loss[ 50 ]:  24.611080169677734
train_batch_loss[ 60 ]:  26.569761276245117
train_batch_loss[ 70 ]:  25.440114974975586
train_batch_loss[ 80 ]:  24.095073699951172
train_batch_loss[ 90 ]:  24.965740203857422
train_batch_loss[ 100 ]:  24.04668426513672
train_batch_loss[ 110 ]:  25.298580169677734
train_batch_loss[ 120 ]:  23.279333114624023
train_batch_loss[ 130 ]:  25.71746826171875
train_batch_loss[ 140 ]:  23.87799072265625
train_batch_loss[ 150 ]:  24.745609283447266
train_batch_loss[ 160 ]:  23.414257049560547
train_batch_loss[ 170 ]:  24.748353958129883
train_batch_loss[ 180 ]:  26.4576416015625
train_batch_loss[ 190 ]:  24.68633270263672
train_batch_loss[ 200 ]:  23.498371124267578
train_batch_loss[ 210 ]:  26.11236572265625
train_batch_loss[ 220 ]:  24.86026763916015

train_batch_loss[ 1840 ]:  4.224534034729004
train_batch_loss[ 1850 ]:  5.8397040367126465
train_batch_loss[ 1860 ]:  5.246426582336426
train_batch_loss[ 1870 ]:  6.962961196899414
train_batch_loss[ 1880 ]:  5.629364967346191
train_batch_loss[ 1890 ]:  8.11369514465332
train_batch_loss[ 1900 ]:  7.607270240783691
train_batch_loss[ 1910 ]:  5.176774978637695
train_batch_loss[ 1920 ]:  6.600076675415039
train_batch_loss[ 1930 ]:  6.745774269104004
train_batch_loss[ 1940 ]:  6.736330509185791
train_batch_loss[ 1950 ]:  4.695995330810547
train_batch_loss[ 1960 ]:  6.261013031005859
train_batch_loss[ 1970 ]:  5.013923645019531
train_batch_loss[ 1980 ]:  6.037240505218506
train_batch_loss[ 1990 ]:  3.889737606048584
train_batch_loss[ 2000 ]:  6.012662887573242
train_batch_loss[ 2010 ]:  5.808933258056641
train_batch_loss[ 2020 ]:  6.453112602233887
train_batch_loss[ 2030 ]:  6.6321516036987305
train_batch_loss[ 2040 ]:  6.145816326141357
train_batch_loss[ 2050 ]:  6.223575592041016
train_bat

train_batch_loss[ 3650 ]:  5.259361267089844
train_batch_loss[ 3660 ]:  4.31160831451416
train_batch_loss[ 3670 ]:  4.430137634277344
train_batch_loss[ 3680 ]:  4.143223285675049
train_batch_loss[ 3690 ]:  3.4304604530334473
train_batch_loss[ 3700 ]:  5.000253200531006
train_batch_loss[ 3710 ]:  6.214939117431641
train_batch_loss[ 3720 ]:  4.4908318519592285
train_batch_loss[ 3730 ]:  4.182753562927246
train_batch_loss[ 3740 ]:  3.543653964996338
train_batch_loss[ 3750 ]:  3.5838074684143066
train_batch_loss[ 3760 ]:  4.597156524658203
train_batch_loss[ 3770 ]:  4.551187992095947
train_batch_loss[ 3780 ]:  3.0707640647888184
train_batch_loss[ 3790 ]:  5.127890586853027
train_batch_loss[ 3800 ]:  3.7435190677642822
train_batch_loss[ 3810 ]:  2.4017493724823
train_batch_loss[ 3820 ]:  2.950615167617798
train_batch_loss[ 3830 ]:  4.386658191680908
train_batch_loss[ 3840 ]:  3.2031984329223633
train_batch_loss[ 3850 ]:  3.540591239929199
train_batch_loss[ 3860 ]:  4.437498092651367
train_b

Evaluate:  23%|████████████████                                                     | 365/1568 [05:19<16:27,  1.22it/s]

Corrupted image for 620308


Evaluate:  75%|███████████████████████████████████████████████████                 | 1178/1568 [16:30<05:06,  1.27it/s]

Corrupted image for 419552


Evaluate:  97%|██████████████████████████████████████████████████████████████████▏ | 1527/1568 [21:14<00:32,  1.25it/s]

Corrupted image for 467544


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [21:47<00:00,  1.20it/s]


valid_evaluation: loss=4.138405747594656, acc=0.5554965904023001
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_v2_004000_loss4.138405747594656.pt
train_batch_loss[ 4010 ]:  3.6629106998443604
train_batch_loss[ 4020 ]:  3.4667038917541504
train_batch_loss[ 4030 ]:  3.4923367500305176
train_batch_loss[ 4040 ]:  5.271482944488525
train_batch_loss[ 4050 ]:  5.222695827484131
train_batch_loss[ 4060 ]:  3.5541415214538574
train_batch_loss[ 4070 ]:  5.642711639404297
train_batch_loss[ 4080 ]:  5.274589538574219
train_batch_loss[ 4090 ]:  4.342421531677246
train_batch_loss[ 4100 ]:  5.056007385253906
train_batch_loss[ 4110 ]:  4.086896896362305
train_batch_loss[ 4120 ]:  3.65118670463562
train_batch_loss[ 4130 ]:  4.562140464782715
train_batch_loss[ 4140 ]:  5.341770648956299
train_batch_loss[ 4150 ]:  3.4934678077697754
train_batch_loss[ 4160 ]:  2.9239771366119385
train_batch_loss[ 4170 ]:  3.5671515464782715
train_batch_loss[ 4180 ]:  3.231551170349121
train_

train_batch_loss[ 5790 ]:  3.347609281539917
train_batch_loss[ 5800 ]:  3.5043113231658936
train_batch_loss[ 5810 ]:  3.898099660873413
train_batch_loss[ 5820 ]:  3.601320266723633
train_batch_loss[ 5830 ]:  4.8231048583984375
train_batch_loss[ 5840 ]:  3.100851058959961
train_batch_loss[ 5850 ]:  4.039257526397705
train_batch_loss[ 5860 ]:  2.9872536659240723
train_batch_loss[ 5870 ]:  3.966705799102783
train_batch_loss[ 5880 ]:  3.932114601135254
train_batch_loss[ 5890 ]:  4.477919101715088
train_batch_loss[ 5900 ]:  3.566999912261963
train_batch_loss[ 5910 ]:  3.8292593955993652
train_batch_loss[ 5920 ]:  2.9800825119018555
train_batch_loss[ 5930 ]:  4.122422218322754
train_batch_loss[ 5940 ]:  2.583533763885498
train_batch_loss[ 5950 ]:  4.907719135284424
train_batch_loss[ 5960 ]:  3.4106898307800293
train_batch_loss[ 5970 ]:  2.578031539916992
train_batch_loss[ 5980 ]:  3.9979920387268066
train_batch_loss[ 5990 ]:  2.7318267822265625
train_batch_loss[ 6000 ]:  4.874773979187012
tr

train_batch_loss[ 7600 ]:  4.313961982727051
train_batch_loss[ 7610 ]:  2.8389620780944824
train_batch_loss[ 7620 ]:  2.9023842811584473
train_batch_loss[ 7630 ]:  3.900123119354248
train_batch_loss[ 7640 ]:  2.6204540729522705
train_batch_loss[ 7650 ]:  2.7340571880340576
train_batch_loss[ 7660 ]:  3.0386528968811035
train_batch_loss[ 7670 ]:  2.2296395301818848
train_batch_loss[ 7680 ]:  3.08601713180542
train_batch_loss[ 7690 ]:  3.891648292541504
train_batch_loss[ 7700 ]:  2.604327440261841
train_batch_loss[ 7710 ]:  2.157949447631836
train_batch_loss[ 7720 ]:  4.362321853637695
train_batch_loss[ 7730 ]:  2.2718472480773926
train_batch_loss[ 7740 ]:  4.9485859870910645
train_batch_loss[ 7750 ]:  3.8876442909240723
train_batch_loss[ 7760 ]:  2.770796775817871
train_batch_loss[ 7770 ]:  3.976685047149658
train_batch_loss[ 7780 ]:  2.427100658416748
train_batch_loss[ 7790 ]:  3.522014617919922
train_batch_loss[ 7800 ]:  2.47849178314209
train_batch_loss[ 7810 ]:  2.7051315307617188
tr

Evaluate:  11%|███████▎                                                             | 166/1568 [02:05<18:07,  1.29it/s]

Corrupted image for 467544


Evaluate:  29%|███████████████████▋                                                 | 448/1568 [05:41<14:06,  1.32it/s]

Corrupted image for 419552


Evaluate:  54%|█████████████████████████████████████▏                               | 846/1568 [10:46<08:59,  1.34it/s]

Corrupted image for 620308


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [19:55<00:00,  1.31it/s]


valid_evaluation: loss=3.343903567589988, acc=0.6249965742076453
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_v2_008000_loss3.343903567589988.pt
train_batch_loss[ 8010 ]:  2.556375503540039
train_batch_loss[ 8020 ]:  3.3300390243530273
train_batch_loss[ 8030 ]:  3.0454397201538086
train_batch_loss[ 8040 ]:  5.355524063110352
train_batch_loss[ 8050 ]:  3.520481824874878
train_batch_loss[ 8060 ]:  3.9062724113464355
train_batch_loss[ 8070 ]:  2.982847213745117
train_batch_loss[ 8080 ]:  3.831601858139038
train_batch_loss[ 8090 ]:  3.3890154361724854
train_batch_loss[ 8100 ]:  4.26798677444458
train_batch_loss[ 8110 ]:  4.712113380432129
train_batch_loss[ 8120 ]:  3.913109064102173
train_batch_loss[ 8130 ]:  3.166780471801758
train_batch_loss[ 8140 ]:  4.569340229034424
train_batch_loss[ 8150 ]:  4.004302024841309
train_batch_loss[ 8160 ]:  4.250143527984619
train_batch_loss[ 8170 ]:  3.266453266143799
train_batch_loss[ 8180 ]:  2.507626533508301
train_bat

KeyboardInterrupt: 

In [12]:

import os
import pickle

 
with open('D:/lab2/深度学习课程设计/crnn-pytorch-master/train_history/mobileVit_rnn_loss', 'wb') as fp:
    pickle.dump([loss_history,valid_history], fp)

MemoryError: 

In [None]:
with open ('D:/lab2/深度学习课程设计/crnn-pytorch-master/train_history/mobileVit_rnn_1', 'rb') as fp:
    temp = pickle.load(fp)
temp

In [None]:
pytorch_total_params = sum(p.numel() for p in crnn.parameters())
pytorch_total_params

In [13]:
loss_history

[24.15996551513672,
 23.147846221923828,
 25.26816177368164,
 23.757020950317383,
 22.980716705322266,
 24.600717544555664,
 23.73973274230957,
 23.625492095947266,
 25.49618911743164,
 23.96426010131836,
 23.72730255126953,
 23.053531646728516,
 24.715412139892578,
 24.980304718017578,
 24.164417266845703,
 24.762134552001953,
 22.56833839416504,
 23.74498748779297,
 23.180675506591797,
 23.6967830657959,
 22.54412841796875,
 23.37295150756836,
 21.49481773376465,
 23.705638885498047,
 20.418201446533203,
 23.384031295776367,
 21.797086715698242,
 22.812301635742188,
 20.524314880371094,
 21.212459564208984,
 23.029254913330078,
 21.234966278076172,
 21.494609832763672,
 21.280935287475586,
 22.542755126953125,
 21.828720092773438,
 21.478775024414062,
 19.28566551208496,
 20.738784790039062,
 20.330886840820312,
 20.08675765991211,
 17.75045394897461,
 18.13392448425293,
 18.938695907592773,
 20.794984817504883,
 18.07672119140625,
 18.164703369140625,
 18.431640625,
 15.343786239624

# mobileVit special

In [13]:
class MobileViT_RNN(nn.Module):
    def __init__(self, image_size, dims, channels, num_classes, expansion=4, kernel_size=3, patch_size=(2, 2)):
        super().__init__()
        ih, iw = image_size
        ph, pw = patch_size
        assert ih % ph == 0 and iw % pw == 0

        L = [2, 4, 3]
        rnn_hidden = 256
        self.conv1 = conv_nxn_bn(1, channels[0], stride=2)
                
        '0    1   2     3   4     5    6    7'      
        '[16, 64, 128, 128, 256, 256, 512,512]'
            
        self.mv2 = nn.ModuleList([])
        self.mv2.append(MV2Block(channels[0], channels[1], 1, expansion)) #0
        self.mv2.append(MV2Block(channels[1], channels[2], 2, expansion)) #1
        #vit
        self.mv2.append(MV2Block(channels[3], channels[4], 2, expansion)) #2
        #vit
        self.mv2.append(MV2Block(channels[5], channels[6], 2, expansion)) #3
        #vit
        
        self.mvit = nn.ModuleList([])
        self.mvit.append(MobileViTBlock(dims[0], L[0], channels[2], kernel_size, patch_size, int(dims[0]*2)))
        self.mvit.append(MobileViTBlock(dims[1], L[1], channels[4], kernel_size, patch_size, int(dims[1]*4)))
        self.mvit.append(MobileViTBlock(dims[2], L[2], channels[6], kernel_size, patch_size, int(dims[2]*4)))

        self.conv2 = conv_1x1_bn(channels[-2], channels[-1])

        self.pool = nn.AvgPool2d(ih//32, 1)
        self.fc = nn.Linear(channels[-1], num_classes, bias=False)
        
        map_to_seq_hidden = 1000
        #self.model.head.fc = nn.Linear(self.model.head.in_features, map_to_seq)
        output_channel, output_height, output_width = self._compute_output_shape(1, image_size[1], image_size[0])
        
        self.map_to_seq = nn.Linear(output_channel * output_height, map_to_seq_hidden)
        self.rnn1 = nn.LSTM(map_to_seq_hidden, rnn_hidden, bidirectional=True)
        self.rnn2 = nn.LSTM(2 * rnn_hidden, rnn_hidden, bidirectional=True)

        self.dense = nn.Linear(2 * rnn_hidden, num_class)
        
    def backbone_forward(self,x):
        x = self.conv1(x)
        x = self.mv2[0](x)
        x = self.mv2[1](x)
        #print(x.shape)
        x = self.mvit[0](x)
        #print(x.shape)
        x = self.mv2[2](x)
        x = self.mvit[1](x)
        x = self.mv2[3](x)
        x = self.mvit[2](x)
        
        
        x = self.conv2(x)
        return x

    def forward(self, x):
        
        #x = self.pool(x).view(-1, x.shape[1])
        x = self.backbone_forward(x)
        batch, channel, height, width = x.size()
        #print('batch, channel, height, width')
        #print(batch, channel, height, width)
        x = x.view(batch, channel * height, width)
        x = x.permute(2, 0, 1)  # (width, batch, feature)
        #print(x.size())
        seq = self.map_to_seq(x)

        recurrent, _ = self.rnn1(seq)
        recurrent, _ = self.rnn2(recurrent)

        output = self.dense(recurrent)
        #x = self.fc(x)
        return output
    
    
    def _compute_output_shape(self,img_channels,  img_width, img_height):
        #print(img_channels,  img_height, img_width)
        dummy_input = torch.randn(1, img_channels, img_width, img_height)
        dummy_output = self.backbone_forward(dummy_input)
        #print(dummy_output.size())
        
        _, channels, width, height = dummy_output.size()
        
        return channels, height, width

In [14]:
num_class = 37
dims = [64, 80, 96]
channels = [16, 32, 64, 64, 128, 128, 256,256]
model = MobileViT_RNN((32, 128), dims, channels, num_classes = num_class, expansion=2)

dummy_input = torch.randn(5, 1, 32, 128)
dummy_output =model(dummy_input)
dummy_output.size()

torch.Size([1, 64, 32, 8])
torch.Size([1, 64, 32, 8])
torch.Size([5, 64, 8, 32])
torch.Size([5, 64, 8, 32])


torch.Size([8, 5, 37])

In [63]:
pytorch_total_params = sum(p.numel() for p in model.parameters())
pytorch_total_params

7862269

In [58]:

epochs = config['epochs']
train_batch_size = config['train_batch_size']
eval_batch_size = config['eval_batch_size']
lr = config['lr']
show_interval = config['show_interval']
valid_interval = config['valid_interval']
save_interval = config['save_interval']
end_interval = 40000
cpu_workers = config['cpu_workers']
reload_checkpoint = config['reload_checkpoint']
valid_max_iter = config['valid_max_iter']

img_width = 128
img_height = 32
img_channel = config['img_channel']
data_dir = config['data_dir']
#print(img_width,img_height)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'device: {device}')

train_dataset = Synth90kDataset(root_dir=data_dir, mode='train',
                                img_height=img_height, img_width=img_width)
valid_dataset = Synth90kDataset(root_dir=data_dir, mode='dev',
                                img_height=img_height, img_width=img_width)

print(valid_dataset.__len__())
train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=train_batch_size,
    shuffle=True,
    num_workers=0,
    collate_fn=synth90k_collate_fn)
valid_loader = DataLoader(
    dataset=valid_dataset,
    batch_size=eval_batch_size,
    shuffle=True,
    num_workers=0,
    collate_fn=synth90k_collate_fn)

num_class = len(Synth90kDataset.LABEL2CHAR) + 1
'''
crnn = CRNN(1, img_height, img_width, num_class,
            map_to_seq_hidden=config['map_to_seq_hidden'],
            rnn_hidden=config['rnn_hidden'],
            leaky_relu=config['leaky_relu'])
if reload_checkpoint:
    crnn.load_state_dict(torch.load(reload_checkpoint, map_location=device))
'''  
dims = [64, 80, 96]
channels = [16, 32, 64, 64, 128, 128, 256,256]
crnn = MobileViT_RNN((32, 128), dims, channels, num_classes = num_class, expansion=2)
#crnn.load_state_dict(torch.load('D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobileVit_chars74k_2.pth'),strict=False)
pytorch_total_params = sum(p.numel() for p in crnn.parameters())
print('total param: ', pytorch_total_params)


device: cuda
802734
total param:  7862269


In [59]:

crnn.to(device)

optimizer = optim.Adam(crnn.parameters())
criterion = CTCLoss(reduction='sum', zero_infinity=True)
criterion.to(device)
loss_history=[]
valid_history=[]
assert save_interval % valid_interval == 0
i = 1
for epoch in range(1, epochs + 1):
    print(f'epoch: {epoch}')
    tot_train_loss = 0.
    tot_train_count = 0
    for train_data in train_loader:
        loss = train_batch(crnn, train_data, optimizer, criterion, device)
        train_size = train_data[0].size(0)

        tot_train_loss += loss
        tot_train_count += train_size
        if i % show_interval == 0:
            print('train_batch_loss[', i, ']: ', loss / train_size)
            loss_history.append(loss / train_size)

        if i % valid_interval == 0:
            evaluation = evaluate(crnn, valid_loader, criterion,
                                  decode_method=config['decode_method'],
                                  beam_size=config['beam_size'])
            print('valid_evaluation: loss={loss}, acc={acc}'.format(**evaluation))
            valid_history.append(evaluation)
            if i % save_interval == 0:
                prefix = 'mobilevit_rnn_v3'
                loss = evaluation['loss']
                save_model_path = os.path.join(config['checkpoints_dir'],
                                               f'{prefix}_{i:06}_loss{loss}.pt')
                torch.save(crnn.state_dict(), save_model_path)
                print('save model at ', save_model_path)

        if i % end_interval == 0:
            break
        i += 1

    print('train_loss: ', tot_train_loss / tot_train_count)

epoch: 1
train_batch_loss[ 10 ]:  9.833599090576172
train_batch_loss[ 20 ]:  10.535752296447754
train_batch_loss[ 30 ]:  10.780181884765625
train_batch_loss[ 40 ]:  11.867664337158203
train_batch_loss[ 50 ]:  10.562088012695312
train_batch_loss[ 60 ]:  10.592382431030273
train_batch_loss[ 70 ]:  9.933206558227539
train_batch_loss[ 80 ]:  10.512938499450684
train_batch_loss[ 90 ]:  11.573403358459473
train_batch_loss[ 100 ]:  11.273148536682129
train_batch_loss[ 110 ]:  10.757099151611328
train_batch_loss[ 120 ]:  10.757181167602539
train_batch_loss[ 130 ]:  10.68504524230957
train_batch_loss[ 140 ]:  10.932250022888184
train_batch_loss[ 150 ]:  11.772577285766602
train_batch_loss[ 160 ]:  13.45754623413086
train_batch_loss[ 170 ]:  11.529508590698242
train_batch_loss[ 180 ]:  11.235370635986328
train_batch_loss[ 190 ]:  9.458175659179688
train_batch_loss[ 200 ]:  9.761530876159668
train_batch_loss[ 210 ]:  10.659767150878906
train_batch_loss[ 220 ]:  12.334430694580078
train_batch_loss

train_batch_loss[ 1850 ]:  5.899908542633057
train_batch_loss[ 1860 ]:  4.720038414001465
train_batch_loss[ 1870 ]:  5.312541961669922
train_batch_loss[ 1880 ]:  4.1861772537231445
train_batch_loss[ 1890 ]:  4.794222831726074
train_batch_loss[ 1900 ]:  3.7928197383880615
train_batch_loss[ 1910 ]:  3.4472274780273438
train_batch_loss[ 1920 ]:  4.141385555267334
train_batch_loss[ 1930 ]:  3.939920425415039
train_batch_loss[ 1940 ]:  4.447235107421875
train_batch_loss[ 1950 ]:  3.0109336376190186
train_batch_loss[ 1960 ]:  3.002383232116699
train_batch_loss[ 1970 ]:  4.424862861633301
train_batch_loss[ 1980 ]:  4.445396900177002
train_batch_loss[ 1990 ]:  5.779014587402344
train_batch_loss[ 2000 ]:  4.895319938659668
train_batch_loss[ 2010 ]:  4.481448650360107
train_batch_loss[ 2020 ]:  5.327464580535889
train_batch_loss[ 2030 ]:  5.438371658325195
train_batch_loss[ 2040 ]:  3.8320236206054688
train_batch_loss[ 2050 ]:  5.733742713928223
train_batch_loss[ 2060 ]:  4.855203628540039
train

train_batch_loss[ 3660 ]:  3.078932285308838
train_batch_loss[ 3670 ]:  2.793851852416992
train_batch_loss[ 3680 ]:  3.736936092376709
train_batch_loss[ 3690 ]:  3.4963746070861816
train_batch_loss[ 3700 ]:  3.0187976360321045
train_batch_loss[ 3710 ]:  3.6988906860351562
train_batch_loss[ 3720 ]:  3.408034324645996
train_batch_loss[ 3730 ]:  3.9663617610931396
train_batch_loss[ 3740 ]:  2.047365427017212
train_batch_loss[ 3750 ]:  3.0721211433410645
train_batch_loss[ 3760 ]:  3.6721999645233154
train_batch_loss[ 3770 ]:  3.5331454277038574
train_batch_loss[ 3780 ]:  3.788383960723877
train_batch_loss[ 3790 ]:  3.5295610427856445
train_batch_loss[ 3800 ]:  3.5023210048675537
train_batch_loss[ 3810 ]:  3.9890472888946533
train_batch_loss[ 3820 ]:  2.0893845558166504
train_batch_loss[ 3830 ]:  2.6752681732177734
train_batch_loss[ 3840 ]:  4.110867500305176
train_batch_loss[ 3850 ]:  3.214167833328247
train_batch_loss[ 3860 ]:  3.4345438480377197
train_batch_loss[ 3870 ]:  2.8713479042053

Evaluate:   2%|█                                                                     | 25/1568 [00:21<19:21,  1.33it/s]

Corrupted image for 419552


Evaluate:   4%|██▋                                                                   | 61/1568 [00:50<21:25,  1.17it/s]

Corrupted image for 467544


Evaluate:  42%|████████████████████████████▉                                        | 659/1568 [08:35<09:57,  1.52it/s]

Corrupted image for 620308


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [18:54<00:00,  1.38it/s]


valid_evaluation: loss=3.189322854866151, acc=0.16190294667972205
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_v3_004000_loss3.189322854866151.pt
train_batch_loss[ 4010 ]:  3.2884435653686523
train_batch_loss[ 4020 ]:  4.41522741317749
train_batch_loss[ 4030 ]:  3.072528839111328
train_batch_loss[ 4040 ]:  4.084329605102539
train_batch_loss[ 4050 ]:  3.6947805881500244
train_batch_loss[ 4060 ]:  4.474493503570557
train_batch_loss[ 4070 ]:  3.658442974090576
train_batch_loss[ 4080 ]:  2.717303514480591
train_batch_loss[ 4090 ]:  3.140626907348633
train_batch_loss[ 4100 ]:  4.164340972900391
train_batch_loss[ 4110 ]:  3.186330556869507
train_batch_loss[ 4120 ]:  4.045413017272949
train_batch_loss[ 4130 ]:  1.8499773740768433
train_batch_loss[ 4140 ]:  3.408149242401123
train_batch_loss[ 4150 ]:  3.7791748046875
train_batch_loss[ 4160 ]:  3.3394365310668945
train_batch_loss[ 4170 ]:  3.2753944396972656
train_batch_loss[ 4180 ]:  3.06010103225708
train_batc

train_batch_loss[ 5780 ]:  2.9595065116882324
train_batch_loss[ 5790 ]:  2.3235199451446533
train_batch_loss[ 5800 ]:  2.905834197998047
train_batch_loss[ 5810 ]:  3.3923392295837402
train_batch_loss[ 5820 ]:  2.083649158477783
train_batch_loss[ 5830 ]:  2.663877010345459
train_batch_loss[ 5840 ]:  2.909839153289795
train_batch_loss[ 5850 ]:  2.6254982948303223
train_batch_loss[ 5860 ]:  3.247581958770752
train_batch_loss[ 5870 ]:  2.8613743782043457
train_batch_loss[ 5880 ]:  1.8319153785705566
train_batch_loss[ 5890 ]:  2.5245935916900635
train_batch_loss[ 5900 ]:  1.7381675243377686
train_batch_loss[ 5910 ]:  3.04557466506958
train_batch_loss[ 5920 ]:  3.354848861694336
train_batch_loss[ 5930 ]:  3.5413990020751953
train_batch_loss[ 5940 ]:  2.793388605117798
train_batch_loss[ 5950 ]:  2.173583507537842
train_batch_loss[ 5960 ]:  2.548964500427246
train_batch_loss[ 5970 ]:  2.0771543979644775
train_batch_loss[ 5980 ]:  2.904280424118042
train_batch_loss[ 5990 ]:  2.534499406814575
t

train_batch_loss[ 7580 ]:  3.233560800552368
train_batch_loss[ 7590 ]:  2.8325657844543457
train_batch_loss[ 7600 ]:  2.79740309715271
train_batch_loss[ 7610 ]:  3.191277265548706
train_batch_loss[ 7620 ]:  2.5764942169189453
train_batch_loss[ 7630 ]:  3.847545623779297
train_batch_loss[ 7640 ]:  3.0700390338897705
train_batch_loss[ 7650 ]:  1.2632285356521606
train_batch_loss[ 7660 ]:  2.6056008338928223
train_batch_loss[ 7670 ]:  2.1541383266448975
train_batch_loss[ 7680 ]:  3.878678321838379
train_batch_loss[ 7690 ]:  2.5896830558776855
train_batch_loss[ 7700 ]:  3.3199825286865234
train_batch_loss[ 7710 ]:  1.9049208164215088
train_batch_loss[ 7720 ]:  1.9264169931411743
train_batch_loss[ 7730 ]:  3.7657344341278076
train_batch_loss[ 7740 ]:  2.624603509902954
train_batch_loss[ 7750 ]:  1.6153075695037842
train_batch_loss[ 7760 ]:  2.3777523040771484
train_batch_loss[ 7770 ]:  3.445253610610962
train_batch_loss[ 7780 ]:  2.7481207847595215
train_batch_loss[ 7790 ]:  3.2768001556396

Evaluate:  17%|███████████▌                                                         | 263/1568 [02:42<14:07,  1.54it/s]

Corrupted image for 620308


Evaluate:  24%|████████████████▍                                                    | 374/1568 [03:54<12:59,  1.53it/s]

Corrupted image for 467544


Evaluate:  40%|███████████████████████████▋                                         | 629/1568 [06:35<09:35,  1.63it/s]

Corrupted image for 419552


Evaluate: 100%|████████████████████████████████████████████████████████████████████| 1568/1568 [16:17<00:00,  1.60it/s]


valid_evaluation: loss=2.6722909843792433, acc=0.2055089232547768
save model at  D:/lab2/深度学习课程设计/crnn-pytorch-master/checkpoints/mobilevit_rnn_v3_008000_loss2.6722909843792433.pt
train_batch_loss[ 8010 ]:  2.2248923778533936
train_batch_loss[ 8020 ]:  3.2494008541107178
train_batch_loss[ 8030 ]:  2.0741119384765625
train_batch_loss[ 8040 ]:  2.664968252182007


KeyboardInterrupt: 

In [None]:
from torchvision import models
model = models.resnet50(pretrained=False)

In [None]:
import timm

# from timm
pretrained_model_name = "resnet50"
model = timm.create_model(pretrained_model_name, pretrained=False)

In [26]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms

vgg16 = models.vgg16(weights=False)
vgg16

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
import torch.nn as nn


class CRNN(nn.Module):

    def __init__(self, img_channel, img_height, img_width, num_class,
                 map_to_seq_hidden=64, rnn_hidden=256, leaky_relu=False):
        super(CRNN, self).__init__()

        self.cnn, (output_channel, output_height, output_width) = \
            self._cnn_backbone(img_channel, img_height, img_width, leaky_relu)

        self.map_to_seq = nn.Linear(output_channel * output_height, map_to_seq_hidden)

        self.rnn1 = nn.LSTM(map_to_seq_hidden, rnn_hidden, bidirectional=True)
        self.rnn2 = nn.LSTM(2 * rnn_hidden, rnn_hidden, bidirectional=True)

        self.dense = nn.Linear(2 * rnn_hidden, num_class)

    def _cnn_backbone(self, img_channel, img_height, img_width, leaky_relu):
        assert img_height % 16 == 0
        assert img_width % 4 == 0

        channels = [img_channel, 64, 128, 256, 256, 512, 512, 512]
        kernel_sizes = [3, 3, 3, 3, 3, 3, 2]
        strides = [1, 1, 1, 1, 1, 1, 1]
        paddings = [1, 1, 1, 1, 1, 1, 0]

        cnn = nn.Sequential()

        def conv_relu(i, batch_norm=False):
            # shape of input: (batch, input_channel, height, width)
            input_channel = channels[i]
            output_channel = channels[i+1]

            cnn.add_module(
                f'conv{i}',
                nn.Conv2d(input_channel, output_channel, kernel_sizes[i], strides[i], paddings[i])
            )

            if batch_norm:
                cnn.add_module(f'batchnorm{i}', nn.BatchNorm2d(output_channel))

            relu = nn.LeakyReLU(0.2, inplace=True) if leaky_relu else nn.ReLU(inplace=True)
            cnn.add_module(f'relu{i}', relu)

        # size of image: (channel, height, width) = (img_channel, img_height, img_width)
        conv_relu(0)
        cnn.add_module('pooling0', nn.MaxPool2d(kernel_size=2, stride=2))
        # (64, img_height // 2, img_width // 2)

        conv_relu(1)
        cnn.add_module('pooling1', nn.MaxPool2d(kernel_size=2, stride=2))
        # (128, img_height // 4, img_width // 4)

        conv_relu(2)
        conv_relu(3)
        cnn.add_module(
            'pooling2',
            nn.MaxPool2d(kernel_size=(2, 1))
        )  # (256, img_height // 8, img_width // 4)

        conv_relu(4, batch_norm=True)
        conv_relu(5, batch_norm=True)
        cnn.add_module(
            'pooling3',
            nn.MaxPool2d(kernel_size=(2, 1))
        )  # (512, img_height // 16, img_width // 4)

        conv_relu(6)  # (512, img_height // 16 - 1, img_width // 4 - 1)

        output_channel, output_height, output_width = \
            channels[-1], img_height // 16 - 1, img_width // 4 - 1
        return cnn, (output_channel, output_height, output_width)

    def forward(self, images):
        # shape of images: (batch, channel, height, width)

        conv = self.cnn(images)
        batch, channel, height, width = conv.size()

        conv = conv.view(batch, channel * height, width)
        conv = conv.permute(2, 0, 1)  # (width, batch, feature)
        seq = self.map_to_seq(conv)

        recurrent, _ = self.rnn1(seq)
        recurrent, _ = self.rnn2(recurrent)

        output = self.dense(recurrent)
        return output  # shape: (seq_len, batch, num_class)

In [25]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms



# Freeze the parameters of the pre-trained layers


class VGG_RNN(nn.Module):

    def __init__(self, img_channel, img_height, img_width, num_class,
                 map_to_seq_hidden=64, rnn_hidden=256, leaky_relu=False):
        super(VGG_RNN, self).__init__()
        vgg16 = models.vgg16(weights=False)
        
        
        vgg16.features[0] = nn.Conv2d(img_channel, 64, kernel_size=3, stride=1, padding=1)
        ''' for param in vgg16.features.parameters():
            param.requires_grad = False'''
        self.cnn  = vgg16.features
            
        
        output_channel, output_height, output_width = self._compute_output_shape(img_channel, img_height, img_width)
        
        self.map_to_seq = nn.Linear(output_channel * output_height, map_to_seq_hidden)

        self.rnn1 = nn.LSTM(map_to_seq_hidden, rnn_hidden, bidirectional=True)
        self.rnn2 = nn.LSTM(2 * rnn_hidden, rnn_hidden, bidirectional=True)

        self.dense = nn.Linear(2 * rnn_hidden, num_class)

    def _compute_output_shape(self,img_channel,  img_height, img_width):
        dummy_input = torch.randn(1, img_channel, img_height, img_width)
        dummy_output = self.cnn(dummy_input)
        _, channels, height, width = dummy_output.size()
        return channels, height, width

    def forward(self, images):
        # shape of images: (batch, channel, height, width)

        conv = self.cnn(images)
        batch, channel, height, width = conv.size()
        print('batch, channel, height, width' , batch, channel, height, width)
        conv = conv.view(batch, channel * height, width)
        conv = conv.permute(2, 0, 1)  # (width, batch, feature)
        seq = self.map_to_seq(conv)

        recurrent, _ = self.rnn1(seq)
        recurrent, _ = self.rnn2(recurrent)

        output = self.dense(recurrent)
        return output  # shape: (seq_len, batch, num_class)


In [26]:
def train_batch(crnn, data, optimizer, criterion, device):
    crnn.train()
    images, targets, target_lengths = [d.to(device) for d in data]
    print(images.size())
    logits = crnn(images)
    print(logits.size())
    log_probs = torch.nn.functional.log_softmax(logits, dim=2)

    batch_size = images.size(0)
    input_lengths = torch.LongTensor([logits.size(0)] * batch_size)
    target_lengths = torch.flatten(target_lengths)
    #print(batch_size)
    #print(input_lengths.size())
    #print(target_lengths.size())
    #print('end')
    loss = criterion(log_probs, targets, input_lengths, target_lengths)

    optimizer.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_norm_(crnn.parameters(), 5) # gradient clipping with 5
    optimizer.step()
    return loss.item()



In [29]:


epochs = config['epochs']
train_batch_size = config['train_batch_size']
eval_batch_size = config['eval_batch_size']
lr = config['lr']
show_interval = config['show_interval']
valid_interval = config['valid_interval']
save_interval = config['save_interval']
cpu_workers = config['cpu_workers']
reload_checkpoint = config['reload_checkpoint']
valid_max_iter = config['valid_max_iter']

img_width = config['img_width']
img_height = config['img_height']
data_dir = config['data_dir']

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'device: {device}')

train_dataset = Synth90kDataset(root_dir=data_dir, mode='train',
                                img_height=img_height, img_width=img_width)
valid_dataset = Synth90kDataset(root_dir=data_dir, mode='dev',
                                img_height=img_height, img_width=img_width)

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=train_batch_size,
    shuffle=True,
    num_workers=0,
    collate_fn=synth90k_collate_fn)
valid_loader = DataLoader(
    dataset=valid_dataset,
    batch_size=eval_batch_size,
    shuffle=True,
    num_workers=0,
    collate_fn=synth90k_collate_fn)

num_class = len(Synth90kDataset.LABEL2CHAR) + 1
crnn = VGG_RNN(1, img_height, img_width, num_class,
            map_to_seq_hidden=config['map_to_seq_hidden'],
            rnn_hidden=config['rnn_hidden'],
            leaky_relu=config['leaky_relu'])
if reload_checkpoint:
    crnn.load_state_dict(torch.load(reload_checkpoint, map_location=device))
crnn.to(device)

optimizer = optim.Adam(crnn.parameters())
criterion = CTCLoss(reduction='sum', zero_infinity=True)
criterion.to(device)

assert save_interval % valid_interval == 0
i = 1
for epoch in range(1, epochs + 1):
    print(f'epoch: {epoch}')
    tot_train_loss = 0.
    tot_train_count = 0
    for train_data in train_loader:
        loss = train_batch(crnn, train_data, optimizer, criterion, device)
        train_size = train_data[0].size(0)

        tot_train_loss += loss
        tot_train_count += train_size
        if i % show_interval == 0:
            print('train_batch_loss[', i, ']: ', loss / train_size)

        if i % valid_interval == 0:
            evaluation = evaluate(crnn, valid_loader, criterion,
                                  decode_method=config['decode_method'],
                                  beam_size=config['beam_size'])
            print('valid_evaluation: loss={loss}, acc={acc}'.format(**evaluation))

            if i % save_interval == 0:
                prefix = 'crnn'
                loss = evaluation['loss']
                save_model_path = os.path.join(config['checkpoints_dir'],
                                               f'{prefix}_{i:06}_loss{loss}.pt')
                torch.save(crnn.state_dict(), save_model_path)
                print('save model at ', save_model_path)

        i += 1

    print('train_loss: ', tot_train_loss / tot_train_count)



device: cuda
epoch: 1
image size
torch.Size([64, 1, 64, 64])
batch, channel, height, width 64 512 2 2
batch logits log_probs targets input_len target_len
64
torch.Size([2, 64, 37])
torch.Size([2, 64, 37])
torch.Size([539])
torch.Size([64])
torch.Size([64])
end
image size
torch.Size([64, 1, 64, 64])
batch, channel, height, width 64 512 2 2
batch logits log_probs targets input_len target_len
64
torch.Size([2, 64, 37])
torch.Size([2, 64, 37])
torch.Size([533])
torch.Size([64])
torch.Size([64])
end
image size
torch.Size([64, 1, 64, 64])
batch, channel, height, width 64 512 2 2
batch logits log_probs targets input_len target_len
64
torch.Size([2, 64, 37])
torch.Size([2, 64, 37])
torch.Size([509])
torch.Size([64])
torch.Size([64])
end
image size
torch.Size([64, 1, 64, 64])
batch, channel, height, width 64 512 2 2
batch logits log_probs targets input_len target_len
64
torch.Size([2, 64, 37])
torch.Size([2, 64, 37])
torch.Size([533])
torch.Size([64])
torch.Size([64])
end
image size
torch.Size(

KeyboardInterrupt: 

In [11]:


epochs = config['epochs']
train_batch_size = config['train_batch_size']
eval_batch_size = config['eval_batch_size']
lr = config['lr']
show_interval = config['show_interval']
valid_interval = config['valid_interval']
save_interval = config['save_interval']
cpu_workers = config['cpu_workers']
reload_checkpoint = config['reload_checkpoint']
valid_max_iter = config['valid_max_iter']

img_width = config['img_width']
img_height = config['img_height']
data_dir = config['data_dir']

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'device: {device}')

train_dataset = Synth90kDataset(root_dir=data_dir, mode='train',
                                img_height=img_height, img_width=img_width)
valid_dataset = Synth90kDataset(root_dir=data_dir, mode='dev',
                                img_height=img_height, img_width=img_width)

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=train_batch_size,
    shuffle=True,
    num_workers=0,
    collate_fn=synth90k_collate_fn)
valid_loader = DataLoader(
    dataset=valid_dataset,
    batch_size=eval_batch_size,
    shuffle=True,
    num_workers=0,
    collate_fn=synth90k_collate_fn)

num_class = len(Synth90kDataset.LABEL2CHAR) + 1
crnn = VGG_RNN(1, img_height, img_width, num_class,
            map_to_seq_hidden=config['map_to_seq_hidden'],
            rnn_hidden=config['rnn_hidden'],
            leaky_relu=config['leaky_relu'])
if reload_checkpoint:
    crnn.load_state_dict(torch.load(reload_checkpoint, map_location=device))
crnn.to(device)

optimizer = optim.RMSprop(crnn.parameters(), lr=lr)
criterion = CTCLoss(reduction='sum', zero_infinity=True)
criterion.to(device)

assert save_interval % valid_interval == 0
history = []

i = 1
for epoch in range(1, epochs + 1):
    print(f'epoch: {epoch}')
    tot_train_loss = 0.
    tot_train_count = 0
    for train_data in train_loader:
        loss = train_batch(crnn, train_data, optimizer, criterion, device)
        train_size = train_data[0].size(0)

        tot_train_loss += loss
        tot_train_count += train_size
        if i % show_interval == 0:
            print('train_batch_loss[', i, ']: ', loss / train_size)
            history.append(loss / train_size)

        if i % valid_interval == 0:
            evaluation = evaluate(crnn, valid_loader, criterion,
                                  decode_method=config['decode_method'],
                                  beam_size=config['beam_size'])
            print('valid_evaluation: loss={loss}, acc={acc}'.format(**evaluation))

            if i % save_interval == 0:
                prefix = 'vgg-rnn'
                loss = evaluation['loss']
                save_model_path = os.path.join(config['checkpoints_dir'],
                                               f'{prefix}_{i:06}_loss{loss}.pt')
                torch.save(crnn.state_dict(), save_model_path)
                print('save model at ', save_model_path)

        i += 1

    print('train_loss: ', tot_train_loss / tot_train_count)



device: cuda
epoch: 1
train_batch_loss[ 10 ]:  0.19078294932842255
train_batch_loss[ 20 ]:  0.23420129716396332
train_batch_loss[ 30 ]:  0.3162612318992615
train_batch_loss[ 40 ]:  0.3073037564754486
train_batch_loss[ 50 ]:  0.22981135547161102
train_batch_loss[ 60 ]:  0.19790568947792053
train_batch_loss[ 70 ]:  0.28985390067100525
train_batch_loss[ 80 ]:  0.3370938003063202
train_batch_loss[ 90 ]:  0.17985296249389648
train_batch_loss[ 100 ]:  0.28941500186920166
train_batch_loss[ 110 ]:  0.17580735683441162
train_batch_loss[ 120 ]:  0.10407394170761108
train_batch_loss[ 130 ]:  0.10388155281543732
train_batch_loss[ 140 ]:  0.2338990867137909
train_batch_loss[ 150 ]:  0.1459864377975464
train_batch_loss[ 160 ]:  0.1163751482963562
train_batch_loss[ 170 ]:  0.045228131115436554
train_batch_loss[ 180 ]:  0.3173154294490814


KeyboardInterrupt: 

In [32]:
import os
import glob

import torch
from torch.utils.data import Dataset
from scipy import signal
from scipy.io import wavfile
import cv2
from PIL import Image
import numpy as np


class Synth90kDataset(Dataset):
    CHARS = '0123456789abcdefghijklmnopqrstuvwxyz'
    CHAR2LABEL = {char: i + 1 for i, char in enumerate(CHARS)}
    LABEL2CHAR = {label: char for char, label in CHAR2LABEL.items()}

    def __init__(self, root_dir=None, mode=None, paths=None, img_height=32, img_width=100,img_channel=3):
        if root_dir and mode and not paths:
            paths, texts = self._load_from_raw_files(root_dir, mode)
        elif not root_dir and not mode and paths:
            texts = None

        self.paths = paths
        self.texts = texts
        self.img_height = img_height
        self.img_width = img_width
        self.img_channel = img_channel

    def _load_from_raw_files(self, root_dir, mode):
        mapping = {}
        with open(os.path.join(root_dir, 'lexicon.txt'), 'r') as fr:
            for i, line in enumerate(fr.readlines()):
                mapping[i] = line.strip()

        paths_file = None
        if mode == 'train':
            paths_file = 'annotation_train.txt'
        elif mode == 'dev':
            paths_file = 'annotation_val.txt'
        elif mode == 'test':
            paths_file = 'annotation_test.txt'

        paths = []
        texts = []
        with open(os.path.join(root_dir, paths_file), 'r') as fr:
            for line in fr.readlines():
                path, index_str = line.strip().split(' ')
                path = os.path.join(root_dir, path)
                index = int(index_str)
                text = mapping[index]
                paths.append(path)
                texts.append(text)
        return paths, texts

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, index):
        path = self.paths[index]

        try:
            image = Image.open(path).convert('L')  # grey-scale
        except IOError:
            print('Corrupted image for %d' % index)
            return self[index + 1]
        image = image.convert(mode='RGB')
        
        image = image.resize((self.img_width, self.img_height), resample=Image.BILINEAR)
        image = np.array(image)
        
        image = image.reshape((self.img_channel,self.img_height, self.img_width ))
        image = (image / 127.5) - 1.0

        image = torch.FloatTensor(image)
        #print(image.shape)
        if self.texts:
            text = self.texts[index]
            target = [self.CHAR2LABEL[c] for c in text]
            target_length = [len(target)]

            target = torch.LongTensor(target)
            target_length = torch.LongTensor(target_length)
            return image, target, target_length
        else:
            return image


def synth90k_collate_fn(batch):
    images, targets, target_lengths = zip(*batch)
    images = torch.stack(images, 0)
    targets = torch.cat(targets, 0)
    target_lengths = torch.cat(target_lengths, 0)
    return images, targets, target_lengths


In [36]:
def train_batch(crnn, data, optimizer, criterion, device):
    crnn.train()
    images, targets, target_lengths = [d.to(device) for d in data]
    #print(images.size())
    logits = crnn(images)
    #print(logits.size())
    log_probs = torch.nn.functional.log_softmax(logits,dim=2)

    batch_size = images.size(0)
    input_lengths = torch.LongTensor([logits.size(0)] * batch_size)
    target_lengths = torch.flatten(target_lengths)

    loss = criterion(log_probs, targets, input_lengths, target_lengths)

    optimizer.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_norm_(crnn.parameters(), 5) # gradient clipping with 5
    optimizer.step()
    return loss.item()



In [8]:
from einops import rearrange

def conv_1x1_bn(inp, oup):
    return nn.Sequential(
        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
        nn.BatchNorm2d(oup),
        nn.SiLU()
    )


def conv_nxn_bn(inp, oup, kernal_size=3, stride=1):
    return nn.Sequential(
        nn.Conv2d(inp, oup, kernal_size, stride, 1, bias=False),
        nn.BatchNorm2d(oup),
        nn.SiLU()
    )


class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.fn = fn
    
    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)


class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout=0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.SiLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout)
        )
    
    def forward(self, x):
        return self.net(x)


class Attention(nn.Module):
    def __init__(self, dim, heads=8, dim_head=64, dropout=0.):
        super().__init__()
        inner_dim = dim_head *  heads
        project_out = not (heads == 1 and dim_head == dim)

        self.heads = heads
        self.scale = dim_head ** -0.5

        self.attend = nn.Softmax(dim = -1)
        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)

        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, dim),
            nn.Dropout(dropout)
        ) if project_out else nn.Identity()

    def forward(self, x):
        qkv = self.to_qkv(x).chunk(3, dim=-1)
        q, k, v = map(lambda t: rearrange(t, 'b p n (h d) -> b p h n d', h = self.heads), qkv)

        dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
        attn = self.attend(dots)
        out = torch.matmul(attn, v)
        out = rearrange(out, 'b p h n d -> b p n (h d)')
        return self.to_out(out)


class Transformer(nn.Module):
    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.):
        super().__init__()
        self.layers = nn.ModuleList([])
        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                PreNorm(dim, Attention(dim, heads, dim_head, dropout)),
                PreNorm(dim, FeedForward(dim, mlp_dim, dropout))
            ]))
    
    def forward(self, x):
        for attn, ff in self.layers:
            x = attn(x) + x
            x = ff(x) + x
        return x


class MV2Block(nn.Module):
    def __init__(self, inp, oup, stride=1, expansion=4):
        super().__init__()
        self.stride = stride
        assert stride in [1, 2]

        hidden_dim = int(inp * expansion)
        self.use_res_connect = self.stride == 1 and inp == oup

        if expansion == 1:
            self.conv = nn.Sequential(
                # dw
                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
                nn.BatchNorm2d(hidden_dim),
                nn.SiLU(),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
            )
        else:
            self.conv = nn.Sequential(
                # pw
                nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
                nn.BatchNorm2d(hidden_dim),
                nn.SiLU(),
                # dw
                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
                nn.BatchNorm2d(hidden_dim),
                nn.SiLU(),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
            )

    def forward(self, x):
        if self.use_res_connect:
            return x + self.conv(x)
        else:
            return self.conv(x)


class MobileViTBlock(nn.Module):
    def __init__(self, dim, depth, channel, kernel_size, patch_size, mlp_dim, dropout=0.):
        super().__init__()
        self.ph, self.pw = patch_size

        self.conv1 = conv_nxn_bn(channel, channel, kernel_size)
        self.conv2 = conv_1x1_bn(channel, dim)

        self.transformer = Transformer(dim, depth, 4, 8, mlp_dim, dropout)

        self.conv3 = conv_1x1_bn(dim, channel)
        self.conv4 = conv_nxn_bn(2 * channel, channel, kernel_size)
    
    def forward(self, x):
        y = x.clone()

        # Local representations
        x = self.conv1(x)
        x = self.conv2(x)
        
        # Global representations
        _, _, h, w = x.shape
        x = rearrange(x, 'b d (h ph) (w pw) -> b (ph pw) (h w) d', ph=self.ph, pw=self.pw)
        x = self.transformer(x)
        x = rearrange(x, 'b (ph pw) (h w) d -> b d (h ph) (w pw)', h=h//self.ph, w=w//self.pw, ph=self.ph, pw=self.pw)

        # Fusion
        x = self.conv3(x)
        x = torch.cat((x, y), 1)
        x = self.conv4(x)
        return x

In [5]:
class MobileVit_improv(nn.Module):
    def __init__(self, image_size, dims, channels, num_classes, expansion=4, kernel_size=3, patch_size=(2, 2)):
        super().__init__()
        ih, iw = image_size
        ph, pw = patch_size
        assert ih % ph == 0 and iw % pw == 0

        L = [2, 4, 3]
        rnn_hidden = 256
        self.conv1 = conv_nxn_bn(1, channels[0], stride=2)

        self.mv2 = nn.ModuleList([])
        self.mv2.append(MV2Block(channels[0], channels[1], 1, expansion))
        self.mv2.append(MV2Block(channels[1], channels[2], 1, expansion))
        self.mv2.append(MV2Block(channels[2], channels[3], 1, expansion))
        self.mv2.append(MV2Block(channels[2], channels[3], 1, expansion))   # Repeat
        self.mv2.append(MV2Block(channels[3], channels[4], 1, expansion))
        self.mv2.append(MV2Block(channels[5], channels[6], 2, expansion))
        self.mv2.append(MV2Block(channels[7], channels[8], 2, expansion))
        
        self.mvit = nn.ModuleList([])
        self.mvit.append(MobileViTBlock(dims[0], L[0], channels[5], kernel_size, patch_size, int(dims[0]*2)))
        self.mvit.append(MobileViTBlock(dims[1], L[1], channels[7], kernel_size, patch_size, int(dims[1]*4)))
        self.mvit.append(MobileViTBlock(dims[2], L[2], channels[9], kernel_size, patch_size, int(dims[2]*4)))

        self.conv2 = conv_1x1_bn(channels[-2], channels[-1])

        self.pool = nn.AvgPool2d(ih//32, 1)
        self.fc = nn.Linear(channels[-1], num_classes, bias=False)
        
        map_to_seq_hidden = 1000
        #self.model.head.fc = nn.Linear(self.model.head.in_features, map_to_seq)
        output_channel, output_height, output_width = self._compute_output_shape(1, image_size[1], image_size[0])
        
        self.map_to_seq = nn.Linear(output_channel * output_height, map_to_seq_hidden)
        self.rnn1 = nn.LSTM(map_to_seq_hidden, rnn_hidden, bidirectional=True)
        self.rnn2 = nn.LSTM(2 * rnn_hidden, rnn_hidden, bidirectional=True)

        self.dense = nn.Linear(2 * rnn_hidden, num_class)
        
    def backbone_forward(self,x):
        x = self.conv1(x)
        x = self.mv2[0](x)

        x = self.mv2[1](x)
        x = self.mv2[2](x)
        x = self.mv2[3](x)      # Repeat

        x = self.mv2[4](x)
        x = self.mvit[0](x)

        x = self.mv2[5](x)
        x = self.mvit[1](x)

        x = self.mv2[6](x)
        x = self.mvit[2](x)
        x = self.conv2(x)
        return x

    def forward(self, x):
        
        #x = self.pool(x).view(-1, x.shape[1])
        x = self.backbone_forward(x)
        batch, channel, height, width = x.size()
        print('batch, channel, height, width')
        print(batch, channel, height, width)
        x = x.view(batch, channel * height, width)
        x = x.permute(2, 0, 1)  # (width, batch, feature)
        #print(x.size())
        seq = self.map_to_seq(x)

        recurrent, _ = self.rnn1(seq)
        recurrent, _ = self.rnn2(recurrent)

        output = self.dense(recurrent)
        #x = self.fc(x)
        return output
    
    
    def _compute_output_shape(self,img_channels,  img_width, img_height):
        print(img_channels,  img_height, img_width)
        dummy_input = torch.randn(1, img_channels, img_width, img_height)
        dummy_output = self.backbone_forward(dummy_input)
        print(dummy_output.size())
        
        _, channels, width, height = dummy_output.size()
        
        return channels, height, width