"""
    machine: laptop
    this script creates yolo training data with IAM handwritten official split
    Aim is to train yolov3 with this split and use the same split for training PHOSCNET
    
    So the data is created in 2 ways from IAM official split.
    
    1. Line level annotation is taken and only lines which belongs to training data are
    considered. Here training data file contains train as well as test file.
    
    Foloowing are the files created.
    "mnist_train_PhoscIamOfficialSplitForYoloLineLevel1.txt"
    "mnist_test_PhoscIamOfficialSplitForYoloLineLevel1.txt"

    
    2.All Lines which are present on training data page are taken, So in this case few lines
    can not be present in as IAM official split but will be considered for YOLOV# localization.
    
    
    
 

"""

In [1]:
import os
splitFilePath="./data/largeWriterIndependentTextLineRecognitionTask"

#os.listdir(splitFilePath)

trainFile=os.path.join(splitFilePath, 'trainset.txt')
testFile=os.path.join(splitFilePath,'testset.txt')
validFile1=os.path.join(splitFilePath,'validationset1.txt')
validFile2=os.path.join(splitFilePath,'validationset2.txt')

"""
    below function takes the line number and finds to which original image it 
    belongs, this image split will help to identify yolo train,test,valid data
    
"""
def gatherFormInOfficialSplitForYolo(temp):

    formsPath="/home/aniketag/Documents/phd/yolov5/data/datasets/forms//"

    gatherList=[]
    allLines=[]
    
    for indx,lineDetail in enumerate(temp):

        count=0
        imgNameIndx=0
        lineNoIndx=0
        #print(" lineDetail:",lineDetail)
        for charIndx,ele in enumerate(lineDetail):

            if ele=="-":
                count+=1
                
            if count==2:
                imgNameIndx=charIndx
                break
            if count==3:
                lineNoIndx=charIndx
                break

        pngName=lineDetail[:imgNameIndx]+".png"
        pngPath=os.path.join(formsPath,pngName)
        
        lineName=lineDetail#[:(imgNameIndx+2)]
        allLines.append(lineName)
        
        if os.path.isfile(pngPath):
            gatherList.append(pngName)

        #print(" check File:",os.path.isfile(pngPath))
    
    gatherList=list(set(gatherList))
    allLines=list(set(allLines))
    print(" total png gathered:",len(gatherList)," original lines:",len(temp),"unique lines:",len(allLines))    

    return gatherList,allLines    



In [2]:
with open(trainFile) as f1:
    trainSet=f1.readlines()

trainList,trainLines=gatherFormInOfficialSplitForYolo(trainSet)

print(" train len:",len(trainList))
print(" trainLines len:",len(trainLines))


with open(testFile) as f1:
    testSet=f1.readlines()

testList,testLines=gatherFormInOfficialSplitForYolo(testSet)
    
with open(validFile1) as f1:
    validSet1=f1.readlines()

validList1,valLines1=gatherFormInOfficialSplitForYolo(validSet1)

with open(validFile2) as f1:
    validSet2=f1.readlines()

validList2,valLines2=gatherFormInOfficialSplitForYolo(validSet2)
    
validList=validList1+validList2
valLines=valLines1+valLines2


print(" test len:",len(testList))
print(" testLines len:",len(testLines))

print(" valid len:",len(validList))
print(" valLines len:",len(valLines))


 total png gathered: 747  original lines: 6161 unique lines: 6161
 train len: 747
 trainLines len: 6161
 total png gathered: 232  original lines: 1861 unique lines: 1861
 total png gathered: 105  original lines: 900 unique lines: 900
 total png gathered: 115  original lines: 940 unique lines: 940
 test len: 232
 testLines len: 1861
 valid len: 220
 valLines len: 1840


In [3]:
"""
    checking - count to create a rule
"""

maxCount=0
counts=[]
for ele in trainSet:
    
    count=-1
    
    for char in ele:
        
        if char=="-":
            count+=1
        
        maxCount=max(count,maxCount)
        counts.append(count)
        
print("maxCount:",maxCount," min count:",min(counts))

maxCount: 1  min count: -1


"""

    The above list provides files in train,test,valid split
    
    Next is to create Annotations for it

"""

In [4]:
len(trainSet),len(list(set(trainSet)))
#trainSet

(6161, 6161)

In [5]:

import pandas as pd
#df=pd.read_csv("./data/data3_phosc.csv")
df=pd.read_csv("./data/data_14_april.csv")
unqImages=set(df.image_name)
print("\n\t unique:",len(unqImages))
#print(data.columns)


	 unique: 1539


In [16]:
df.head(100)

Unnamed: 0,image_name,class,width,height,org_x1,org_y1,org_x2,org_y2,text,cropName,x,y,w,h
0,g06-031n.png,1,2479,3542,435,678,450,697,',g06-031n-00-00,0.18,0.19,0.01,0.01
1,g06-031n.png,1,2479,3542,425,683,663,777,What,g06-031n-00-01,0.22,0.21,0.10,0.03
2,g06-031n.png,1,2479,3542,640,726,696,779,a,g06-031n-00-02,0.27,0.21,0.02,0.01
3,g06-031n.png,1,2479,3542,727,685,1128,820,frightful,g06-031n-00-03,0.37,0.21,0.16,0.04
4,g06-031n.png,1,2479,3542,1129,692,1365,779,event,g06-031n-00-04,0.50,0.21,0.10,0.02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,h07-054a.png,1,500,400,1097,853,1255,929,until,h07-054a-01-02,2.35,2.23,0.32,0.19
96,h07-054a.png,1,500,400,1313,845,1474,948,quite,h07-054a-01-03,2.79,2.24,0.32,0.26
97,h07-054a.png,1,500,400,1512,843,1773,938,recently,h07-054a-01-04,3.29,2.23,0.52,0.24
98,h07-054a.png,1,500,400,1797,838,2022,923,shown,h07-054a-01-05,3.82,2.20,0.45,0.21


In [20]:
len(trainList),len(validList),len(testList)

(747, 220, 232)

In [25]:
#for file in ['train','test']:
import numpy as np
import os

allImagesPath="/home/aniketag/Documents/phd/yolov5/data/datasets/forms//"
allImages=os.listdir(allImagesPath)
images_num=len(allImages)#len(os.listdir("/home/k/phd/yolov5/data/datasets/forms/"))
images_path = "./data/datasets/forms/" #allImagesPath # "/home/k/phd/yolov5/data/datasets/forms/"#os.getcwd()+f"/mnist_{file}"

#labels_txt = os.getcwd()+"/"+"mnist_train1.txt"

labels_txt =os.path.join(os.getcwd(),"mnist","mnist_train_PhoscIamOfficialSplitForYoloAllLineLevel1.txt")
labels_txt2 =os.path.join(os.getcwd(),"mnist","mnist_test_PhoscIamOfficialSplitForYoloAllLineLevel1.txt")

trainValCount,testCount=0,0

lastName=""
curLine=""
prevLine=""
unqImg=[]
exceptionCount=0

for indx,info in df.iterrows():
        
    imgName=info.image_name
    lineName=info.cropName[:-3]+"\n"
    
    
    try:
        if indx%100==0:
            print("\n\t unq images:",len(unqImg))

        if (imgName in trainList or imgName in validList):
            trainValCount+=1
            with open(labels_txt, "a") as wf:
                image_path =images_path+info["image_name"]

                if indx%100==0:
                    print("\n\t path:",image_path)

                if info["image_name"]!=lastName and info["image_name"] not in unqImg:
                    curLine=image_path

                    if len(prevLine):
                        #print("\n\t prevLine:",prevLine)
                        #wf.write( image_path+ "\n")
                        wf.write(prevLine+"\n")

                else:
                    xmin,ymin=str(int(info["org_x1"])),str(int(info["org_y1"]))
                    xmax,ymax=str(int(info["org_x2"])),str(int(info["org_y2"]))
                    curLine += ' ' + ','.join([xmin, ymin, xmax, ymax, str(0)])
                    prevLine=curLine

                lastName=info["image_name"]

        elif imgName in testList:
            testCount+=1
            with open(labels_txt2, "a") as wf:
                image_path =images_path+info["image_name"]

                if indx%100==0:
                    print("\n\t path:",image_path)

                if info["image_name"]!=lastName:
                    curLine=image_path

                    if len(prevLine):
                        #print("\n\t prevLine:",prevLine)
                        #wf.write( image_path+ "\n")
                        wf.write(prevLine+"\n")

                else:
                    xmin,ymin=str(int(info["org_x1"])),str(int(info["org_y1"]))
                    xmax,ymax=str(int(info["org_x2"])),str(int(info["org_y2"]))
                    curLine += ' ' + ','.join([xmin, ymin, xmax, ymax, str(0)])
                    prevLine=curLine

                lastName=info["image_name"]
            
        if info["image_name"] not in unqImg:
            unqImg.append(info["image_name"])

    except Exception as e:
        exceptionCount+=1
        print("\n\t exception index:",indx,"\t count:",exceptionCount)

        
print(" trainValCount:",trainValCount," testCount:",testCount)


	 unq images: 0

	 unq images: 2

	 unq images: 3

	 path: ./data/datasets/forms/n06-119.png

	 unq images: 5

	 path: ./data/datasets/forms/e06-006.png

	 unq images: 6

	 path: ./data/datasets/forms/g06-018i.png

	 unq images: 7

	 path: ./data/datasets/forms/a03-009.png

	 unq images: 9

	 path: ./data/datasets/forms/g06-018f.png

	 unq images: 10

	 path: ./data/datasets/forms/g04-092.png

	 unq images: 11

	 path: ./data/datasets/forms/d06-086.png

	 unq images: 12

	 path: ./data/datasets/forms/c06-080.png

	 unq images: 13

	 path: ./data/datasets/forms/f04-087.png

	 unq images: 15

	 path: ./data/datasets/forms/d04-121.png

	 unq images: 16

	 path: ./data/datasets/forms/e07-079.png

	 unq images: 18

	 path: ./data/datasets/forms/c03-081c.png

	 unq images: 19

	 unq images: 20

	 path: ./data/datasets/forms/d06-063.png

	 unq images: 22

	 path: ./data/datasets/forms/a01-011u.png

	 unq images: 23

	 path: ./data/datasets/forms/g04-072.png

	 unq images: 24

	 unq images: 2


	 unq images: 237

	 path: ./data/datasets/forms/n04-218.png

	 unq images: 238

	 path: ./data/datasets/forms/e06-030.png

	 unq images: 239

	 unq images: 241

	 path: ./data/datasets/forms/p06-047.png

	 unq images: 242

	 path: ./data/datasets/forms/g01-037.png

	 unq images: 243

	 path: ./data/datasets/forms/g06-037g.png

	 unq images: 245

	 path: ./data/datasets/forms/f04-093.png

	 unq images: 246

	 path: ./data/datasets/forms/f07-081b.png

	 unq images: 247

	 path: ./data/datasets/forms/a01-003.png

	 unq images: 249

	 path: ./data/datasets/forms/g06-018m.png

	 unq images: 250

	 path: ./data/datasets/forms/f07-039a.png

	 unq images: 252

	 path: ./data/datasets/forms/d04-066.png

	 unq images: 253

	 unq images: 254

	 unq images: 255

	 path: ./data/datasets/forms/f01-075.png

	 unq images: 257

	 path: ./data/datasets/forms/e04-062.png

	 unq images: 258

	 path: ./data/datasets/forms/g07-000b.png

	 unq images: 259

	 path: ./data/datasets/forms/e07-105.png

	 unq i


	 unq images: 464

	 unq images: 466

	 path: ./data/datasets/forms/m02-112.png

	 unq images: 468

	 path: ./data/datasets/forms/b04-162.png

	 unq images: 469

	 path: ./data/datasets/forms/g06-031g.png

	 unq images: 470

	 path: ./data/datasets/forms/g04-048.png

	 unq images: 472

	 unq images: 473

	 unq images: 474

	 unq images: 476

	 path: ./data/datasets/forms/g06-031l.png

	 unq images: 477

	 path: ./data/datasets/forms/d06-041.png

	 unq images: 478

	 unq images: 480

	 path: ./data/datasets/forms/e04-079.png

	 unq images: 481

	 path: ./data/datasets/forms/m06-048.png

	 unq images: 483

	 unq images: 484

	 path: ./data/datasets/forms/c06-103.png

	 unq images: 486

	 unq images: 487

	 path: ./data/datasets/forms/a01-063x.png

	 unq images: 488

	 path: ./data/datasets/forms/c03-081e.png

	 unq images: 489

	 path: ./data/datasets/forms/g06-050l.png

	 unq images: 491

	 unq images: 492

	 path: ./data/datasets/forms/f01-070.png

	 unq images: 494

	 unq images: 495


	 unq images: 696

	 path: ./data/datasets/forms/m03-110.png

	 unq images: 697

	 path: ./data/datasets/forms/a02-012.png

	 unq images: 698

	 path: ./data/datasets/forms/a02-116.png

	 unq images: 700

	 path: ./data/datasets/forms/d01-118.png

	 unq images: 701

	 unq images: 703

	 path: ./data/datasets/forms/b03-025.png

	 unq images: 704

	 path: ./data/datasets/forms/e06-003.png

	 unq images: 706

	 path: ./data/datasets/forms/g06-011f.png

	 unq images: 707

	 unq images: 708

	 path: ./data/datasets/forms/b02-102.png

	 unq images: 709

	 path: ./data/datasets/forms/c02-012.png

	 unq images: 711

	 path: ./data/datasets/forms/g01-074.png

	 unq images: 712

	 unq images: 714

	 path: ./data/datasets/forms/d07-096.png

	 unq images: 715

	 path: ./data/datasets/forms/r06-137.png

	 unq images: 716

	 unq images: 718

	 path: ./data/datasets/forms/c06-138.png

	 unq images: 719

	 path: ./data/datasets/forms/m04-107.png

	 unq images: 721

	 path: ./data/datasets/forms/p06-0


	 unq images: 920

	 unq images: 921

	 unq images: 922

	 unq images: 923

	 path: ./data/datasets/forms/p03-087.png

	 unq images: 924

	 unq images: 926

	 path: ./data/datasets/forms/a01-049u.png

	 unq images: 927

	 path: ./data/datasets/forms/d01-049.png

	 unq images: 929

	 path: ./data/datasets/forms/r03-030.png

	 unq images: 930

	 path: ./data/datasets/forms/a05-069.png

	 unq images: 931

	 unq images: 933

	 path: ./data/datasets/forms/h04-025.png

	 unq images: 934

	 path: ./data/datasets/forms/e04-026.png

	 unq images: 935

	 path: ./data/datasets/forms/d04-021.png

	 unq images: 936

	 path: ./data/datasets/forms/p01-168.png

	 unq images: 938

	 path: ./data/datasets/forms/e04-091.png

	 unq images: 939

	 path: ./data/datasets/forms/b06-045.png

	 unq images: 941

	 path: ./data/datasets/forms/g06-050f.png

	 unq images: 942

	 path: ./data/datasets/forms/c04-134.png

	 unq images: 943

	 path: ./data/datasets/forms/d07-082.png

	 unq images: 945

	 unq images: 9


	 unq images: 1139

	 path: ./data/datasets/forms/c06-076.png

	 unq images: 1140

	 path: ./data/datasets/forms/c04-089.png

	 unq images: 1141

	 path: ./data/datasets/forms/c02-059.png

	 unq images: 1143

	 unq images: 1144

	 path: ./data/datasets/forms/r02-006.png

	 unq images: 1146

	 path: ./data/datasets/forms/g06-047e.png

	 unq images: 1147

	 path: ./data/datasets/forms/a03-059.png

	 unq images: 1148

	 path: ./data/datasets/forms/g06-018d.png

	 unq images: 1150

	 path: ./data/datasets/forms/g06-045m.png

	 unq images: 1151

	 unq images: 1152

	 path: ./data/datasets/forms/f07-009.png

	 unq images: 1153

	 path: ./data/datasets/forms/m06-031.png

	 unq images: 1155

	 path: ./data/datasets/forms/d01-019.png

	 unq images: 1156

	 unq images: 1157

	 path: ./data/datasets/forms/c02-026.png

	 unq images: 1158

	 path: ./data/datasets/forms/a06-064.png

	 unq images: 1160

	 path: ./data/datasets/forms/n02-004.png

	 unq images: 1161

	 path: ./data/datasets/forms/c03-


	 unq images: 1353

	 unq images: 1355

	 path: ./data/datasets/forms/n06-133.png

	 unq images: 1356

	 path: ./data/datasets/forms/m01-115.png

	 unq images: 1357

	 path: ./data/datasets/forms/f07-024a.png

	 unq images: 1358

	 path: ./data/datasets/forms/c03-096d.png

	 unq images: 1360

	 path: ./data/datasets/forms/p06-242.png

	 unq images: 1361

	 path: ./data/datasets/forms/m06-106.png

	 unq images: 1362

	 path: ./data/datasets/forms/n03-106.png

	 unq images: 1364

	 path: ./data/datasets/forms/p03-103.png

	 unq images: 1365

	 path: ./data/datasets/forms/g06-045k.png

	 unq images: 1367

	 path: ./data/datasets/forms/f02-040.png

	 unq images: 1368

	 path: ./data/datasets/forms/g04-032.png

	 unq images: 1369

	 path: ./data/datasets/forms/b04-060.png

	 unq images: 1370

	 path: ./data/datasets/forms/g04-068.png

	 unq images: 1371

	 unq images: 1373

	 path: ./data/datasets/forms/a04-085.png

	 unq images: 1374

	 path: ./data/datasets/forms/a06-057.png

	 unq image

In [22]:
"""
    image visualization
"""

def drawRect(img,l):
    
    image=cv2.rectangle(img, (l[0], l[1]), (l[2], l[3]),(0,0,0),5)
    
    return image



In [23]:
#for file in ['train','test']:
import numpy as np
import os

allImagesPath="/home/aniketag/Documents/phd/yolov5/data/datasets/forms//"
allImages=os.listdir(allImagesPath)
images_num=len(allImages)#len(os.listdir("/home/k/phd/yolov5/data/datasets/forms/"))
images_path = allImagesPath # "/home/k/phd/yolov5/data/datasets/forms/"#os.getcwd()+f"/mnist_{file}"

#labels_txt = os.getcwd()+"/"+"mnist_train1.txt"

labels_txt =os.path.join(os.getcwd(),"mnist","delMe_mnist_train_PhoscIamOfficialSplitForYoloAllLineLevel1.txt")
labels_txt2 =os.path.join(os.getcwd(),"mnist","delMe_mnist_test_PhoscIamOfficialSplitForYoloAllLineLevel1.txt")

trainValCount,testCount=0,0

lastName=""
curLine=""
prevLine=""
unqImg=[]
exceptionCount=0

for indx,info in df.iterrows():
        
    imgName=info.image_name
    lineName=info.cropName[:-3]+"\n"
    
    if indx==0:
        image_path =os.path.join(allImagesPath,info["image_name"])
        img=cv2.imread(image_path)
        print(" 1st image:",img.shape)
    
    try:
        if indx%100==0:
            print("\n\t unq images:",len(unqImg))

        if (imgName in trainList or imgName in validList):
            trainValCount+=1
            with open(labels_txt, "a") as wf:
                image_path =images_path+info["image_name"]

                if indx%100==0:
                    print("\n\t path:",image_path)

                if info["image_name"]!=lastName and info["image_name"] not in unqImg:
                    curLine=image_path

                    if len(prevLine):
                        #print("\n\t prevLine:",prevLine)
                        #wf.write( image_path+ "\n")
                        #wf.write(prevLine+"\n")
                        
                        
                        cv2.imwrite("/home/aniketag/Documents/phd/TensorFlow-2.x-YOLOv3/delme/annotationVisualize//"+lastName,img)

                        #cv2.imwrite(os.path.join(visFolder,lastName),img)
                        img=None
                        imgPath=os.path.join(allImagesPath,imgName)
                        img=cv2.imread(imgPath)
                        print("new imgName:",imgName," shape:",img.shape)                    

                        #input("1.check")


                else:
                    xmin,ymin=str(int(info["org_x1"])),str(int(info["org_y1"]))
                    xmax,ymax=str(int(info["org_x2"])),str(int(info["org_y2"]))
                    curLine += ' ' + ','.join([xmin, ymin, xmax, ymax, str(0)])
                    prevLine=curLine
                    
                    
                    try:
                        img=drawRect(img,[int(xmin),int(ymin),int(xmax),int(ymax)])
                        cv2.imwrite("/home/aniketag/Documents/phd/TensorFlow-2.x-YOLOv3/delme/annotationVisualize//"+imgName,img)
                        input("2.check")

                    except Exception as e:
                        pass

                lastName=info["image_name"]

            
        if info["image_name"] not in unqImg:
            unqImg.append(info["image_name"])

    except Exception as e:
        exceptionCount+=1
        print("\n\t exception index:",indx,"\t count:",exceptionCount)

        
print(" trainValCount:",trainValCount," testCount:",testCount)

 1st image: (3542, 2479, 3)

	 unq images: 0

	 unq images: 2

	 unq images: 3
2.check1
2.check1
2.check1
2.check1


KeyboardInterrupt: Interrupted by user

"""
    neglect below part

"""

In [7]:
def drawRect(img,l):
    
    image=cv2.rectangle(img, (l[0], l[1]), (l[2], l[3]),(0,0,0),5)
    
    return image



"""
    below part creates a necessary files
"""

In [15]:
# for file in ['train','test']:
import numpy as np
import os
import cv2
import sys

#allImagesPath="/home/aniketag/Documents/phd/yolov5/data/datasets/forms//"
allImagesPath="/home/aniketag/Documents/phd/yolov5/data/datasets/forms//"
allImages=os.listdir(allImagesPath)
images_num=len(allImages)#len(os.listdir("/home/k/phd/yolov5/data/datasets/forms/"))
images_path = allImagesPath # "/home/k/phd/yolov5/data/datasets/forms/"#os.getcwd()+f"/mnist_{file}"
visFolder=os.path.join(os.getcwd(),"delMe","annotationVisualize")


#labels_txt = os.getcwd()+"/"+"mnist_train1.txt"

labels_txt =os.path.join(os.getcwd(),"mnist","mnist_train_PhoscIamOfficialSplitForYoloAllLineLevel1.txt")
labels_txt2 =os.path.join(os.getcwd(),"mnist","mnist_test_PhoscIamOfficialSplitForYoloAllLineLevel1.txt")


#labels_txt = os.getcwd()+"/"+"mnist_train_PhoscIamOfficialSplitForYolo.txt"
#labels_txt2 = os.getcwd()+"/"+"mnist_test_PhoscIamOfficialSplitForYolo.txt"

lastName=""
curLine=""
prevLine=""
unqImg=[]
exceptionCount=0
notFound=0

for indx,info in df.iterrows():
    
    imgName=info.image_name
    lineName=info.cropName[:-3]+"\n"
    #print(" lineName=",lineName)

    if indx==0:
        image_path =os.path.join(allImagesPath,info["image_name"])
        img=cv2.imread(image_path)
        print(" 1st image:",img.shape)
        
    if indx==2000:
        break
    
    try:
        if indx%100==0:
            print("\n\t unq images:",len(unqImg)," indx:",indx)

        #if (imgName in trainList and lineName in trainLines) or (imgName in validList and lineName in valLines):# or imgName in validList:

        if 1:
   
            if indx%100==0:
                print("\n\t path:",image_path)
                    
            
            if info["image_name"]!=lastName and info["image_name"] not in unqImg:
                curLine=image_path


                cv2.imwrite("/home/aniketag/Documents/phd/TensorFlow-2.x-YOLOv3/delme/annotationVisualize//"+lastName,img)

                #cv2.imwrite(os.path.join(visFolder,lastName),img)
                img=None
                imgPath=os.path.join(allImagesPath,imgName)
                img=cv2.imread(imgPath)
                print("new imgName:",imgName," shape:",img.shape)                    

                #input("1.check")

                    
                else:
                    
                    """
                        no need to write in file now just keep adding in line
                    """
                    
                    xmin,ymin=str(int(info["org_x1"])),str(int(info["org_y1"]))
                    xmax,ymax=str(int(info["org_x2"])),str(int(info["org_y2"]))
                    curLine += ' ' + ','.join([xmin, ymin, xmax, ymax, str(0)])
                    prevLine=curLine
                                        
                    try:
                        img=drawRect(img,[int(xmin),int(ymin),int(xmax),int(ymax)])
                        cv2.imwrite("/home/aniketag/Documents/phd/TensorFlow-2.x-YOLOv3/delme/annotationVisualize//"+imgName,img)
                        input("1.check")

                    except Exception as e:
                        pass


                lastName=info["image_name"]

        #elif imgName in testList and lineName in testLines:
        elif imgName in testList:

                image_path =os.path.join("./data/datasets/forms/",info["image_name"])

                if indx%100==0:
                    print("\n\t path:",image_path)

                if info["image_name"]!=lastName:
                    curLine=image_path

                    if len(prevLine):
                        #print("\n\t prevLine:",prevLine)
                        #wf.write( image_path+ "\n")
                        #cv2.imwrite(os.path.join(visFolder,lastName),img)

                        cv2.imwrite("/home/aniketag/Documents/phd/TensorFlow-2.x-YOLOv3/delme/annotationVisualize//"+lastName,img)
                        imgPath=os.path.join(allImagesPath,imgName)
                        img=None
                        img=cv2.imread(imgPath)
                        #input("2.check")

                else:
                    xmin,ymin=str(int(info["org_x1"])),str(int(info["org_y1"]))
                    xmax,ymax=str(int(info["org_x2"])),str(int(info["org_y2"]))
                    curLine += ' ' + ','.join([xmin, ymin, xmax, ymax, str(0)])
                    prevLine=curLine

                    try:
                        img=drawRect(img,[int(xmin),int(ymin),int(xmax),int(ymax)])
                        cv2.imwrite("/home/aniketag/Documents/phd/TensorFlow-2.x-YOLOv3/delme/annotationVisualize//"+imgName,img)
                        input("2.check")

                    except Exception as e:
                        pass                    
                    
                lastName=info["image_name"]
        else:
            notFound+=1
            #break        
        
        if info["image_name"] not in unqImg:
            unqImg.append(info["image_name"])

    except Exception as e:
        exceptionCount+=1
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        #print(exc_type, fname, exc_tb.tb_lineno)
        
        print("\n\t exception index:",indx,"\t count:",exceptionCount," e:",e," line no:",exc_tb.tb_lineno)
    
        
print(" notFound:",notFound)

 1st image: (3542, 2479, 3)

	 unq images: 0  indx: 0

	 unq images: 2  indx: 100
2.check1
2.check1


KeyboardInterrupt: Interrupted by user

In [None]:
# for file in ['train','test']:
import numpy as np
import os
import cv2
#allImagesPath="/home/aniketag/Documents/phd/yolov5/data/datasets/forms//"
allImagesPath="/home/aniketag/Documents/phd/yolov5/data/datasets/forms//"
allImages=os.listdir(allImagesPath)
images_num=len(allImages)#len(os.listdir("/home/k/phd/yolov5/data/datasets/forms/"))
images_path = allImagesPath # "/home/k/phd/yolov5/data/datasets/forms/"#os.getcwd()+f"/mnist_{file}"
visFolder=os.path.join(os.getcwd(),"delMe","annotationVisualize")


#labels_txt = os.getcwd()+"/"+"mnist_train1.txt"

labels_txt =os.path.join(os.getcwd(),"mnist","mnist_train_PhoscIamOfficialSplitForYoloAllLineLevel1.txt")
labels_txt2 =os.path.join(os.getcwd(),"mnist","mnist_test_PhoscIamOfficialSplitForYoloAllLineLevel1.txt")


#labels_txt = os.getcwd()+"/"+"mnist_train_PhoscIamOfficialSplitForYolo.txt"
#labels_txt2 = os.getcwd()+"/"+"mnist_test_PhoscIamOfficialSplitForYolo.txt"

lastName=""
curLine=""
prevLine=""
unqImg=[]
exceptionCount=0
notFound=0

for indx,info in df.iterrows():
    
    imgName=info.image_name
    lineName=info.cropName[:-3]+"\n"
    #print(" lineName=",lineName)
    
    if indx==0:
        image_path =os.path.join(allImagesPath,info["image_name"])
        img=cv2.imread(image_path)
        print(" 1st image:",img.shape)
    
    if indx==2000:
        break
    
    try:
        if indx%100==0:
            print("\n\t unq images:",len(unqImg))

        #if (imgName in trainList and lineName in trainLines) or (imgName in validList and lineName in valLines):# or imgName in validList:

        if (imgName in trainList or imgName in validList): # or imgName in validList:
   
            if indx%100==0:
                print("\n\t path:",image_path)
            
            
            with open(labels_txt, "a") as wf:
                image_path =os.path.join("./data/datasets/forms/",info["image_name"])

                """
                    below condition indicates different image present on current and previous
                    line of dataframe, So need to write content for prev line
                """                
                
                if info["image_name"]!=lastName and info["image_name"] not in unqImg:
                    curLine=image_path

                    if len(prevLine):
                        #print("\n\t prevLine:",prevLine)
                        #wf.write( image_path+ "\n")
                        wf.write(prevLine+"\n")
                    
                        cv2.imwrite("/home/aniketag/Documents/phd/TensorFlow-2.x-YOLOv3/delme/annotationVisualize//"+lastName,img)
                        
                        #cv2.imwrite(os.path.join(visFolder,lastName),img)
                        img=None
                        imgPath=os.path.join(allImagesPath,imgName)
                        img=cv2.imread(imgPath)
                        print("new imgName:",imgName," shape:",img.shape)                    
                    
                        #input("1.check")

                    
                else:
                    
                    """
                        no need to write in file now just keep adding in line
                    """
                    
                    xmin,ymin=str(int(info["org_x1"])),str(int(info["org_y1"]))
                    xmax,ymax=str(int(info["org_x2"])),str(int(info["org_y2"]))
                    curLine += ' ' + ','.join([xmin, ymin, xmax, ymax, str(0)])
                    prevLine=curLine
                                        
                    try:
                        img=drawRect(img,[int(xmin),int(ymin),int(xmax),int(ymax)])
                        cv2.imwrite("/home/aniketag/Documents/phd/TensorFlow-2.x-YOLOv3/delme/annotationVisualize//"+imgName,img)
                        input("1.check")

                    except Exception as e:
                        pass


                lastName=info["image_name"]

        #elif imgName in testList and lineName in testLines:
        elif imgName in testList:

            with open(labels_txt2, "a") as wf:
                image_path =os.path.join("./data/datasets/forms/",info["image_name"])

                if indx%100==0:
                    print("\n\t path:",image_path)

                if info["image_name"]!=lastName:
                    curLine=image_path

                    if len(prevLine):
                        #print("\n\t prevLine:",prevLine)
                        #wf.write( image_path+ "\n")
                        wf.write(prevLine+"\n")
                        #cv2.imwrite(os.path.join(visFolder,lastName),img)

                        cv2.imwrite("/home/aniketag/Documents/phd/TensorFlow-2.x-YOLOv3/delme/annotationVisualize//"+lastName,img)
                        imgPath=os.path.join(allImagesPath,imgName)
                        img=None
                        img=cv2.imread(imgPath)
                        #input("2.check")

                else:
                    xmin,ymin=str(int(info["org_x1"])),str(int(info["org_y1"]))
                    xmax,ymax=str(int(info["org_x2"])),str(int(info["org_y2"]))
                    curLine += ' ' + ','.join([xmin, ymin, xmax, ymax, str(0)])
                    prevLine=curLine

                    try:
                        img=drawRect(img,[int(xmin),int(ymin),int(xmax),int(ymax)])
                        cv2.imwrite("/home/aniketag/Documents/phd/TensorFlow-2.x-YOLOv3/delme/annotationVisualize//"+imgName,img)
                        input("2.check")

                    except Exception as e:
                        pass                    
                    
                lastName=info["image_name"]
        else:
            notFound+=1
            #break        
        
        if info["image_name"] not in unqImg:
            unqImg.append(info["image_name"])

    except Exception as e:
        exceptionCount+=1
        print("\n\t exception index:",indx,"\t count:",exceptionCount)
    
        
print(" notFound:",notFound)

In [None]:
"a03-047-05\n" in trainLines