定长CNN模型

In [3]:
from tqdm import tqdm
from  PIL import Image
import cv2
import  albumentations as al
import torchvision.transforms as transforms
trans = transforms.Compose([
                            transforms.ToTensor(),
                            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                        ])
def al_trans(img):
    al_trans = al.Compose([
            al.Resize(height=64,width=192),
            al.OneOf([
                al.IAAAdditiveGaussianNoise(),
                al.GaussNoise(),
            ], p=0.6),
            al.OneOf([
                al.MotionBlur(p=.2),
                al.MedianBlur(blur_limit=3, p=.1),
                al.Blur(blur_limit=3, p=.1),
            ], p=0.2),
            al.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=.2),
            al.RandomBrightnessContrast(p=.3),
            al.OneOf([
                al.OpticalDistortion(p=0.3),
                al.GridDistortion(p=.1),
                al.IAAPiecewiseAffine(p=0.3),
            ], p=0.2),
            al.OneOf([
                al.CLAHE(clip_limit=2),
                al.IAASharpen(),
                al.IAAEmboss(),
            ], p=0.3),

    ])
    img = al_trans(image=img)['image']
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(img)
    img = trans(img)
    return img

ModuleNotFoundError: No module named 'cv2'

In [None]:
class ModelSet(nn.Module):
    def __init__(self, model_cnn='resnet18',use_spp=False): #spp只添加在了efficientnet（感觉有点问题，需要探索一下）
        super(ModelSet, self).__init__()
        self.spp = SPPLayer(3)
        self.use_spp = use_spp
        self.model_cnn = model_cnn
        if self.model_cnn[:6] == 'resnet':
            model_conv = self.cnn_resnet()
            in_feature = list(model_conv.children())[-1].in_features
            model_conv.avgpool = nn.AdaptiveAvgPool2d(1)
            model_conv = nn.Sequential(*list(model_conv.children())[:-1])


        if self.model_cnn[:-3] == 'efficientnet':
            model_conv = EfficientNet.from_pretrained(self.model_cnn)
            self.avgpool = nn.AdaptiveAvgPool2d(1)
            in_feature = list(model_conv.children())[-5].num_features
        if self.model_cnn == 'Xception':
            model_conv = XceptionNet.xception()
            model_conv = nn.Sequential(*list(model_conv.children())[:-1])
            model_conv.add_module('AdaptiveAvgPool2d', nn.AdaptiveAvgPool2d(1))
            in_feature = list(model_conv.children())[-2].num_features
        self.spp_layer = SPPLayer(num_levels=3)
        self.cnn = model_conv
        if self.use_spp == True:
            in_feature = calc_auto(3, in_feature*4)
        self.fc1 = nn.Linear(in_feature, 11)
        self.fc2 = nn.Linear(in_feature, 11)
        self.fc3 = nn.Linear(in_feature, 11)
        self.fc4 = nn.Linear(in_feature, 11)
        self.fc5 = nn.Linear(in_feature, 11)


    def forward(self, img):

        if self.model_cnn[:-3] == 'efficientnet':
            feat = self.cnn.extract_features(img)
            if self.use_spp == True:
                feat = self.spp(feat)
            else:
                feat = self.avgpool(feat)
        else:
            feat = self.cnn(img)
        feat = feat.view(feat.shape[0], -1)
        c1 = self.fc1(feat)
        c2 = self.fc2(feat)
        c3 = self.fc3(feat)
        c4 = self.fc4(feat)
        c5 = self.fc5(feat)

        return c1, c2, c3, c4, c5

    def cnn_resnet(self):
        if self.model_cnn == 'resnet18':
            return models.resnet18(pretrained=True)
        if self.model_cnn == 'resnet34':
            return models.resnet34(pretrained=True)
        if self.model_cnn == 'resnet50':
            return models.resnet50(pretrained=True)
        if self.model_cnn == 'resnet101':
            return models.resnet101(pretrained=True)
        if self.model_cnn == 'resnet152':
            return models.resnet152(pretrained=True)


if __name__ == '__main__':
  model = ModelSet(model_cnn='efficientnet-b0',use_spp=True)
  x = torch.rand((2,3,32,32))
  r = model(x)
  print(r[0].shape)

In [None]:
class SPPLayer(torch.nn.Module):
    def __init__(self, num_levels, pool_type='max_pool'):
        super(SPPLayer, self).__init__()
        self.num_levels = num_levels
        self.pool_type = pool_type
    def forward(self, x):
        num, c, h, w = x.size()
        level = 1
        for i in range(self.num_levels):
            level <<= 1
            kernel_size = (math.ceil(h / level), math.ceil(w / level))  # kernel_size = (h, w)
            padding = (
                math.floor((kernel_size[0] * level - h + 1) / 2), math.floor((kernel_size[1] * level - w + 1) / 2))
            zero_pad = torch.nn.ZeroPad2d((padding[1], padding[1], padding[0], padding[0]))
            x_new = zero_pad(x)
            h_new, w_new = x_new.size()[2:]
            kernel_size = (math.ceil(h_new / level), math.ceil(w_new / level))
            stride = (math.floor(h_new / level), math.floor(w_new / level))
            if self.pool_type == 'max_pool':
                tensor = F.max_pool2d(x_new, kernel_size=kernel_size, stride=stride).view(num, -1)
            elif self.pool_type == 'avg_pool':
                tensor = F.avg_pool2d(x_new, kernel_size=kernel_size, stride=stride).view(num, -1)
            if (i == 0):
                x_flatten = tensor.view(num, -1)
            else:
                x_flatten = torch.cat((x_flatten, tensor.view(num, -1)), 1)

        return x_flatten

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr,weight_decay = 1e-5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)

In [None]:
# 方式1 ： data1[i] !=data2[i] data2[i]==data3[i]==data4[i]
     
# 方式2 ： data1[i] !=data2[i] data2[i]==data3[i] ||  data2[i]==data4[i]
data1 =pd.read_csv(r'D:\r1.csv') 
data1 =pd.read_csv(r'D:\r2.csv') 
sum=0
for i in range(len(data1)):
        if (str(data1.loc[i,'file_code']) !=str(data2.loc[i,'file_code'])) and (len(str(data1.loc[i,'file_code'])))<(len(str(data1.loc[i,'file_code']))) :
            print(data1.loc[i,'file_code'],data2.loc[i,'file_code'])
            data1.loc[i,'file_code'] = data2.loc[i,'file_code']
            sum+=1
        
print(sum)

data1 =pd.read_csv(r'D:\r1.csv') 
data1 =pd.read_csv(r'D:\r2.csv') 
data2 =pd.read_csv(r'D:\r3.csv')
data3 =pd.read_csv(r'D:\r4.csv')
data4 =pd.read_csv(r'D:\r5.csv')
sum=0
for i in range(len(data1)):
        if (str(data1.loc[i,'file_code']) !=str(data2.loc[i,'file_code']))&& (str(data2.loc[i,'file_code'])==str(data3.loc[i,'file_code'])==str(data4.loc[i,'file_code']) )  :
            print(data1.loc[i,'file_code'],data2.loc[i,'file_code'])
            data1.loc[i,'file_code'] = data2.loc[i,'file_code']
            sum+=1
        
print(sum)

In [None]:
### 转换成符合输入数据格式
import json
import os, sys, glob, shutil, json
import  matplotlib.pyplot as plt
from PIL import Image
from lxml.etree import Element, SubElement, tostring
import pprint
from xml.dom.minidom import parseString
from tqdm import tqdm
from xml.etree import ElementTree as ET 


def xml_make(train_imgname,train_label,train_left,train_top,train_width,train_height):
    node_root = Element('annotation')

    node_filename = SubElement(node_root, 'filename')
    node_filename.text = str(train_imgname)
    img = Image.open("mnt/mchar_train/"+str(train_imgname)).convert('RGB')
    #img = Image.open("mnt/mchar_val/"+str(train_imgname)).convert('RGB')
    width=img.width
    height=img.height
    node_size = SubElement(node_root, 'size')
    node_width = SubElement(node_size, 'width')
    node_width.text = str(width)

    node_height = SubElement(node_size, 'height')
    node_height.text = str(height)

    node_depth = SubElement(node_size, 'depth')
    node_depth.text = '3'

    
    for i in range(len(train_left)):
        node_object = SubElement(node_root, 'object')
        node_name = SubElement(node_object, 'name')
        node_name.text = str(train_label[i])
        node_difficult = SubElement(node_object, 'difficult')
        node_difficult.text = '0'
        node_bndbox = SubElement(node_object, 'bndbox')
        node_xmin = SubElement(node_bndbox, 'xmin')
        node_xmin.text = str(int(train_left[i]))
        node_ymin = SubElement(node_bndbox, 'ymin')
        node_ymin.text = str(int(train_top[i]))
        node_xmax = SubElement(node_bndbox, 'xmax')
        node_xmax.text = str(int(train_left[i]+train_width[i]))
        node_ymax = SubElement(node_bndbox, 'ymax')
        node_ymax.text = str(int(train_top[i]+train_height[i]))
    if not os.path.exists('/mnt/train_xml/'):
         os.makedirs('/mnt/train_xml/')
    savename=os.path.join("/mnt/train_xml/", train_imgname.split('.')[0]+'.xml')     
#     if not os.path.exists('mnt/val_xml/'):
#          os.makedirs('mnt/val_xml/')
#     savename=os.path.join("mnt/val_xml/", train_imgname.split('.')[0]+'.xml')     
    tree = ET.ElementTree(node_root)
    tree.write(savename)
    
train_json = json.load(open(r'content/drive/My Drive/mchar_train.json'))
train_imgname=list(train_json.keys())
train_label = [train_json[x]['label'] for x in train_json]
train_height = [train_json[x]['height'] for x in train_json]
train_left = [train_json[x]['left'] for x in train_json]
train_top = [train_json[x]['top'] for x in train_json]
train_width = [train_json[x]['width'] for x in train_json]

for i in tqdm(range(len(train_imgname))):
      xml_make(train_imgname[i],train_label[i],train_left[i],train_top[i],train_width[i],train_height[i])
        
val_json = json.load(open(r'content/drive/My Drive/mchar_val.json'))
val_imgname=list(val_json.keys())
val_label = [val_json[x]['label'] for x in val_json]
val_height = [val_json[x]['height'] for x in val_json]
val_left = [val_json[x]['left'] for x in val_json]
val_top = [val_json[x]['top'] for x in val_json]
val_width = [val_json[x]['width'] for x in val_json]

for i in tqdm(range(len(val_imgname))):
    xml_make(val_imgname[i],val_label[i],val_left[i],val_top[i],val_width[i],val_height[i])

In [None]:
import os
import random
random.seed(912)
trainval_percent = 1
train_percent = 1

## 训练集 / 验证集  需要注意转换完保存
## 或者将训练集和验证集的xml 合并为一个文件夹，再按0.8 0.2 ,增加原本训练集的数量
# xmlfilepath = '/mnt/all_xml/' 
xmlfilepath = '/mnt/train_xml/' 
# xmlfilepath = '/mnt/val_xml/'
total_xml = os.listdir(xmlfilepath)
num = len(total_xml)
list = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list, tv)
train = random.sample(trainval, tr)
if not os.path.exists('/mnt/ImgSet/'):
         os.makedirs('/mnt/ImgSet/')
ftrainval = open('/mnt/ImgSet/trainval.txt', 'w')
ftest = open('/mnt/ImgSet/test.txt', 'w')
ftrain = open('/mnt/ImgSet/train.txt', 'w')
fval = open('/mnt/ImgSet/val.txt', 'w')

for i in list:
    name = total_xml[i][:-4] + '\n'
    if i in trainval:
        ftrainval.write(name)
        if i in train:
            ftest.write(name)
        else:
            fval.write(name)
    else:
        ftrain.write(name)

ftrainval.close()
ftrain.close()
fval.close()
ftest.close()

In [None]:
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join

sets = ['val']
# sets = ['train']
classes = ['1','2','3','4','5','6','7','8','9','0']


def convert(size, box):
    dw = 1. / size[0]
    dh = 1. / size[1]
    x = (box[0] + box[1]) / 2.0
    y = (box[2] + box[3]) / 2.0
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    if x<0:
        x=0
        print(image_id)
    elif x>1:
        x=1
        print(image_id)
    if w<0:
        w=0
        print(image_id)
    elif w>1:
        w=1
        print(image_id)
    if y<0:
        y=0
        print(image_id)
    elif y>1:
        y=1
        print(image_id)
    if h<0:
        h=0
        print(image_id)
    elif h>1:
        h=1 
        print(image_id)
    return (x, y, w, h)
    


def convert_annotation(image_id):
    # in_file = open('/mnt/train_xml/%s.xml' % (image_id))
    # out_file = open('/mnt/train_labels/%s.txt' % (image_id), 'w')
    in_file = open('/mnt/val_xml/%s.xml' % (image_id))
    out_file = open('/mnt/val_labels/%s.txt' % (image_id), 'w')
    tree = ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)
    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult)==1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        # print('(w,h)',w,h,'b',b)
        bb = convert((w,h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')


wd = getcwd()
for image_set in sets:
    # if not os.path.exists('/mnt/train_labels/'):
    #     os.makedirs('/mnt/train_labels/')
    if not os.path.exists('mnt/val_labels/'):
         os.makedirs('mnt/val_labels/')
    image_ids = open('/content/drive/My Drive/TianchiCV/Image_Set/%s.txt' % (image_set)).read().strip().split()
    list_file = open('mnt/%s.txt' % (image_set), 'w')
    for image_id in tqdm(image_ids):
        list_file.write('mnt/mchar_val/%06s.png\n' % (image_id))
        # list_file.write('/mnt/mchar_train/%06s.png\n' % (image_id))
        convert_annotation(image_id)
    list_file.close()


In [None]:
##安装所需环境
!pip install -U -r /content/drive/My\ Drive/yolov5-master/yolov5-master/requirements.txt
## 开启multi-scale  输入尺度320 
!cd /content/drive/My\ Drive/yolov5-master/yolov5-master && python train.py  --batch-size 16  --cache-images #--multi-scale
#只输出结果的txt,没有输出图像(可在detect.py 130 处自行注释语句)
!cd /content/drive/My\ Drive/yolov5-master/yolov5-master && python detect.py  --weights 模型位置  --output 输出位置--save-txt --img-size 192 \ ## 192 第一次infer 0.93+   --iou-thres 值为 0.35
#--augment 使用数据增强时建议提高阈值 

In [None]:
## 将txt中的结果按识别出的字符按从左到右的顺序排序，保存到提交文件
import os 
import numpy as np
import pandas as pd
from tqdm import tqdm
file_path=r'mnt/r//'
txt_file = sorted(os.listdir(file_path))
result = pd.read_csv(r'/content/drive/My Drive/mchar_sample_submit_A.csv')
for i,name in  tqdm(enumerate(txt_file)):
    if name[-3:]=='txt':
    txt = np.loadtxt(file_path+txt_file[i])
    if txt.ndim!=1:
        idex=np.lexsort([txt[:,1]])
        txt = txt[idex, :]
        t=''
        for k in range(len(txt)):
            if  np.int(txt[k][0]) == 9:
                txt[k][0]=-1
            t=t+(str(np.int(txt[k][0])+1))
#         print(i,name,t)
        result.loc[result.file_name==name[:-3]+'png','file_code']=t
    else:
        t=''
        if  np.int(txt[0]) == 9:
                txt[0]=-1
        t=t+(str(np.int(txt[0])+1))
#         print(i,name,t)
        result.loc[result.file_name==name[:-3]+'png','file_code']=t
result.to_csv(r'/content/drive/My Drive/YOLO5_result.csv',index=None)
result.head()