## Training data Printer

In [1]:
import pandas as pd
import os, glob, shutil
pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from tqdm import tqdm
import datetime
import cv2

from IPython.display import display
from IPython.display import Image
from validation_library import FAULT_NAME
print("Setup Complete")

import pymongo
from pprint import pprint
import json
from bson.objectid import ObjectId
print("Mongo db load complete")

#### Helper Function

def barplot_distribution(input_df, col='filename', fig_dw=32, fig_dh=8, title="distribution", table=True, dtype='str'):
    title="{} {}".format(col, title)
    if (table):
        print(title,"------------------")
        print(input_df.groupby(col)[col].count())
    plt.figure(figsize=(fig_dw, fig_dh))
    plt.title(title)
    if dtype=='str': x_values = [(x.replace(",", "\n\n")).replace(" ", "\n") for x in input_df.groupby(col)[col].count().index]
    else:x_values = [x for x in input_df.groupby(col)[col].count().index]
    sns.barplot(x=x_values, y=input_df.groupby(col)[col].count().values)

    
def reload_mylabelcol(mydb, mylabel_col, collection_name):
    mylabel_col = mydb[collection_name]
    mylabel_col.drop()
    mylabel_col = mydb[collection_name]
    print(mylabel_col.find_one())

def createDirectory(path):
    if not os.path.isdir(path):
        os.makedirs(path)
        print ("Creating {}".format(path))
    return path

Setup Complete
Mongo db load complete


### Mongo db

In [2]:
db_name="Aoi_Boards-00"
myclient = pymongo.MongoClient("mongodb://localhost:27017/")
#myclient = pymongo.MongoClient("mongodb+srv://USE_YOUR_USER_NAME_HERE:USE_YOUR_PASSWORD_HERE@boardinfocluster-y6asy.gcp.mongodb.net/test")

# Collection name
board_cname="Test"
filterdb_cname="June18_Detail"


# Load
mydb = myclient[db_name]
myboard_col = mydb[board_cname]
print("-------- myboard_col.find_one --------")
pprint(myboard_col.find_one())
filterdb_col = mydb[filterdb_cname]
print("\n\n-------- filterdb_col.find_one --------")
pprint(filterdb_col.find_one())

-------- myboard_col.find_one --------
{'_id': ObjectId('5e97d86e31f3430c6e99d1a2'),
 'author': 'Fazle',
 'board_name': '369_330-E0900-000-369_A_D',
 'body_dim': {'cx': 1654,
              'cy': 788,
              'height': 23,
              'ori': 90,
              'width': 49,
              'x1': 1630,
              'x2': 1679,
              'y1': 777,
              'y2': 800},
 'date_created': '20200416_1200',
 'date_modified': '20200520_1142',
 'error': 'OK',
 'error_stack': ['OK'],
 'ocv_dims': 0,
 'package': 'CC',
 'path': '369_330-E0900-000-369_A_D/Tiles/4/369_330-E0900-000-369_A_D_RGB.jpg',
 'pins': {'dim_list': [], 'len': 0},
 'refDes ': '1:c100',
 'search_area': {'cx': 1655,
                 'cy': 787,
                 'x1': 1603,
                 'x2': 1708,
                 'y1': 761,
                 'y2': 814},
 'tile_files_info': {'channel_list': [0, 1, 2, 4, 5, 6, 7, 8],
                     'pgm_exist': True,
                     'rgb_exist': True,
                    

------------------------------------
### Extracting data for training

In [3]:
'''
Author: Fazle
Date: 20200615_1728
Description: To print the search area images for all channel as grayscale, so essentially all 
9 channel if present. The object name will be postfix with the channel number.
'''
def save_images_pinarray_and_pins(myboard_col, mylabel_col, myquery, output_dir, img_root, scheme=0):
    
    mydoc = myboard_col.find(myquery)
    print("Analysing records: ", (mydoc.count()))
    
    for row in mydoc:
        ObjectId = row['_id']
        bname=row['board_name']
        total_channels = row['tile_files_info']['total_channels']
        channel_list = row['tile_files_info']['channel_list']
        path_split = os.path.split(os.path.normpath(row['path']))
        tile_no = os.path.split(path_split[0])[1]


        # Make dir
        out_board_dir = os.path.normpath(os.path.join(output_dir,bname))
        out_board_dir = os.path.normpath(os.path.join(output_dir,""))
        if not (os.path.isdir(out_board_dir)):
            os.makedirs(out_board_dir)
        
        #############################################
        # Write txt
        #############################################
        # Pin and Body
        sa = row['search_area']
        new_txtname=out_board_dir +"/" + str(ObjectId)+".txt"
        with open(new_txtname, "w") as f:
            if FAULT_NAME['invalidbodydimensions'] in row['error_stack']:
                print("Error")
                return
                f.write("{},{},{},{},BODY\n".format(row['search_area']['x1']+10 -sa['x1'],
                                                    row['search_area']['y1']+10 -sa['y1'],
                                                    row['search_area']['x2']-10 -sa['x1'], 
                                                    row['search_area']['y2']-10 -sa['y1']))
            else:
                f.write("{},{},{},{},1\n".format(row['body_dim']['x1'] -sa['x1'],
                                                    row['body_dim']['y1'] -sa['y1'],
                                                    row['body_dim']['x2'] -sa['x1'], 
                                                    row['body_dim']['y2'] -sa['y1']
                                                   ))  
                
            
            # Pin
            if (row['pins']['len'])>0:
                for pin_row in row['pins']['dim_list']:
                    f.write("{},{},{},{},2\n".format(pin_row['x1'] -sa['x1'], 
                                                       pin_row['y1'] -sa['y1'], 
                                                       pin_row['x2'] -sa['x1'], 
                                                       pin_row['y2'] -sa['y1']
                                                      ))
            # Pin array
            if 'pinarray' in row['pins']:
                for pin_row in row['pins']['pinarray']['dim_list']:
                    f.write("{},{},{},{},0\n".format(pin_row['x1'] -sa['x1'], 
                                                   pin_row['y1'] -sa['y1'], 
                                                   pin_row['x2'] -sa['x1'], 
                                                   pin_row['y2'] -sa['y1']
                                                  ))
                    
                            
        #############################################
        # Write Image
        #############################################        
        if (scheme == 1):

            new_imgname=out_board_dir +"/" + str(ObjectId)+".jpg"
            
            # Move
            img_path = os.path.normpath(os.path.join(os.path.normpath(img_root), row['path']))
            img = cv2.imread(img_path)
            img = img[abs(sa['y1']) : abs(sa['y2']), abs(sa['x1']) : abs(sa['x2'])]
            cv2.imwrite(new_imgname, img)
            
            # Save to a logging database
            document={
                "board_information_fkey": row['_id'],
                "last_modified":datetime.datetime.now().strftime("%x"),
                "last_modified_hour":datetime.datetime.now().strftime("%H"),
                "img_path": new_imgname,
                "txt_path": new_txtname
            }
            mylabel_col.insert_one(document)
        elif (scheme == 2):
            temp_txtname = new_txtname
            new_imgname=out_board_dir +"/" + str(ObjectId)+".jpg"
            name0=out_board_dir +"/" + str(ObjectId)+"_0.jpg"
            name1=out_board_dir +"/" + str(ObjectId)+"_1.jpg"
            name2=out_board_dir +"/" + str(ObjectId)+"_2.jpg"
            
            
            # Move
            img_path = os.path.normpath(os.path.join(os.path.normpath(img_root), row['path']))
            img = cv2.imread(img_path)
            img = img[abs(sa['y1']) : abs(sa['y2']), abs(sa['x1']) : abs(sa['x2'])]
            
            # Channels
            cv2.imwrite(name0, img[:,:,0])
            cv2.imwrite(name1, img[:,:,1])
            cv2.imwrite(name2, img[:,:,2])
            
            # Text
            shutil.copy(temp_txtname, name0.replace(".jpg", ".txt"))
            shutil.copy(temp_txtname, name1.replace(".jpg", ".txt"))
            shutil.copy(temp_txtname, name2.replace(".jpg", ".txt"))
            os.remove(temp_txtname)
            
            # Save to a logging database
            document={
                "board_information_fkey": row['_id'],
                "last_modified":datetime.datetime.now().strftime("%x"),
                "last_modified_hour":datetime.datetime.now().strftime("%H"),
                "img_path": new_imgname,
                "txt_path": new_txtname
            }
            mylabel_col.insert_one(document)
        elif (scheme == 3):
            # Loop over channels
            temp_txtname = new_txtname
            for channel in channel_list:

                # Path
                new_imgname=out_board_dir +"/" + str(ObjectId)+"_{}.jpg".format(channel)
                new_txtname=out_board_dir +"/" + str(ObjectId)+"_{}.txt".format(channel)

                P = path_split[1].replace("_RGB.jpg", "_{}_{}.jpg".format(tile_no, channel))
                img_path=os.path.normpath(os.path.join(os.path.normpath(img_root), path_split[0], os.path.normpath(P)))


                # Move
                shutil.copy(temp_txtname, new_txtname)
                img = cv2.imread(img_path)
                img=img[abs(sa['y1']) : abs(sa['y2']), abs(sa['x1']) : abs(sa['x2'])]
                cv2.imwrite(new_imgname, img)

                # Save to a logging database
                document={
                    "board_information_fkey": row['_id'],
                    "last_modified":datetime.datetime.now().strftime("%x"),
                    "last_modified_hour":datetime.datetime.now().strftime("%H"),
                    "img_path": new_imgname,
                    "txt_path": new_txtname
                }
                mylabel_col.insert_one(document)
            os.remove(temp_txtname)
        else:
            print("ERROR: Invalid Scheme!")
            return
        
    

In [4]:
package_list=[
 "SOIC",
]  

for package in package_list:
    if 1:
        # Reset
        reload_mylabelcol(mydb, filterdb_col, filterdb_cname)    

        # Query
        myquery = { "package": "SOIC"}

        # Path
        outputdir = createDirectory(
            path="D:/FZ_WS/JyNB/TF_Research_Api_LD_2_0/research/object_detection/images/H_Dataset_08/H_Dataset_02_PinOnly/{}".format(package)) 
        in_board_dir = "D:/FZ_WS/JyNB/Yolo_LD/Tf_Yolov3/LD_Files/Boards/Aoi_Boards_2020_E00"
        output_dir=createDirectory(outputdir)

        # Save
        print("package: ", package)
        save_images_pinarray_and_pins(myboard_col, filterdb_col, myquery, output_dir, in_board_dir, scheme=2)
print("Completed!")

None
Creating D:/FZ_WS/JyNB/TF_Research_Api_LD_2_0/research/object_detection/images/H_Dataset_08/H_Dataset_02_PinOnly/SOIC
package:  SOIC
Analysing records:  588


  # Remove the CWD from sys.path while we load stuff.


Completed!
