Prelim code design for processing planktivore images to model ingest shapes

In [17]:
import sys
import os
import json
import multiprocessing
import io
from glob import glob
import tarfile
import numpy as np
import xarray as xr
import pandas as pd
import datetime as dt
import cv2
from PIL import Image
import argparse
from pathlib import Path
import pdb

In [2]:
sys.path.append('u:/planktivore')
import cvtools
from adjust_roi import pad_and_rescale

In [3]:
# pixel pattern on the sensor.  The "tif" files are the raw pixels from the sensor.
# This pattern needs to be applied to get the actual colors for each pixel.
# This code is set to only process the high_mag_rois data
# The low_mag_rois have about 10 times as many images.
bayer_pattern=cv2.COLOR_BAYER_RG2RGB

In [4]:
jf=open('u:/planktivore/ptvr_proc_settings.json')
settings=json.load(jf)
jf.close()

In [5]:
root_data_path='Y:/2025-04-14-LRAH-27/' #  This could be input

In [6]:
# find all the directories under the root data path
dirs=glob(root_data_path+"/*/")

In [7]:
dirs

['Y:/2025-04-14-LRAH-27\\2025-04-16-02-11-23.030364673\\',
 'Y:/2025-04-14-LRAH-27\\2025-04-18-00-28-00.025366745\\',
 'Y:/2025-04-14-LRAH-27\\2025-04-17-05-57-15.027529093\\',
 'Y:/2025-04-14-LRAH-27\\2025-04-15-14-24-39.041034975\\',
 'Y:/2025-04-14-LRAH-27\\2025-04-15-04-23-39.016101570\\',
 'Y:/2025-04-14-LRAH-27\\2025-04-15-20-32-50.039352690\\',
 'Y:/2025-04-14-LRAH-27\\2025-04-18-05-30-55.015875818\\',
 'Y:/2025-04-14-LRAH-27\\2025-04-16-00-24-52.012176094\\',
 'Y:/2025-04-14-LRAH-27\\2025-04-16-21-43-32.021994705\\',
 'Y:/2025-04-14-LRAH-27\\2025-04-17-07-44-09.009073775\\',
 'Y:/2025-04-14-LRAH-27\\2025-04-16-17-10-29.025996890\\',
 'Y:/2025-04-14-LRAH-27\\2025-04-17-13-16-34.010973519\\',
 'Y:/2025-04-14-LRAH-27\\2025-04-16-08-11-02.038287848\\',
 'Y:/2025-04-14-LRAH-27\\2025-04-16-08-57-06.009720053\\',
 'Y:/2025-04-14-LRAH-27\\2025-04-16-16-08-48.051363189\\',
 'Y:/2025-04-14-LRAH-27\\2025-04-16-17-26-22.011155556\\',
 'Y:/2025-04-14-LRAH-27\\2025-04-14-16-19-26.013482667\\

In [8]:
mag='high_mag_cam_rois/'

In [9]:
# now for an example find all the files within the first of those directories
files=glob(dirs[12]+mag+'**',recursive=True)

In [10]:
files

['Y:/2025-04-14-LRAH-27\\2025-04-16-08-11-02.038287848\\high_mag_cam_rois\\',
 'Y:/2025-04-14-LRAH-27\\2025-04-16-08-11-02.038287848\\high_mag_cam_rois\\0000000000',
 'Y:/2025-04-14-LRAH-27\\2025-04-16-08-11-02.038287848\\high_mag_cam_rois\\0000000000\\high_mag_cam-1744791128896892-132484588136-612-000-1656-1290-36-52.tif',
 'Y:/2025-04-14-LRAH-27\\2025-04-16-08-11-02.038287848\\high_mag_cam_rois\\0000000000\\high_mag_cam-1744791198583543-202177868648-1309-000-1334-218-40-64.tif',
 'Y:/2025-04-14-LRAH-27\\2025-04-16-08-11-02.038287848\\high_mag_cam_rois\\0000000000\\high_mag_cam-1744791263485338-267071410672-1958-000-1008-1530-40-40.tif',
 'Y:/2025-04-14-LRAH-27\\2025-04-16-08-11-02.038287848\\high_mag_cam_rois\\0000000000\\high_mag_cam-1744791317474017-321066028760-2498-000-488-400-36-32.tif',
 'Y:/2025-04-14-LRAH-27\\2025-04-16-08-11-02.038287848\\high_mag_cam_rois\\0000000000\\high_mag_cam-1744791274977244-278570201592-2073-000-102-1792-64-48.tif',
 'Y:/2025-04-14-LRAH-27\\2025-04-1

In [11]:
len(files)

136

In [13]:
root_out_path='u:/planktivore/output_test/'
reformatpath='u:/planktivore/reformat_data/'

In [None]:
# loop through the "files" and see if it is either a tar file or a tiff file
numfile=np.arange(0,len(files))
for afile in numfile:
    filetype1=files[afile].endswith('.tif')
    filetype2=files[afile].endswith('.tar')
    if filetype1:
        # tif file so create the roi jpg file
        # presumably this can be a funtion that will be called from the tar file code below also
        # check file size (some are zero so we don't want to work on those)
        fsize=os.stat(files[afile]).st_size
        if fsize > 0:
            # parse the file name
            idslash=files[afile].rfind('\\')
            # get the path to the image
            abpath=files[afile][0:idslash+1]
            # the name of a particular image
            ftif=files[afile][idslash+1:]
            # import the image
            img=cvtools.import_image(abpath,ftif,settings)
            # create the 8-bit color image
            img_c_8bit=cvtools.convert_to_8bit(img,settings)
            # get the file name less the extension
            fout=ftif[:-4]
            # default is to output everything.  No option that I can tell about output just rawcolor image
            output=cvtools.extract_features(img_c_8bit,img,settings,save_to_disk=True,abs_path=root_out_path+'/',file_prefix=fout)
            #fout=ftif[13:-4]
            #pdb.set_trace()
            
    if filetype2:
        # tar file so extract and create roi jpg file
        # open the tar file as an object
        tobj=tarfile.open(files[afile],'r')
        # get the names of the files in the tar file
        tnames=tobj.getmembers()
        # create a loop counter base on the number of files in the tar
        nnam=np.arange(1,len(tnames))
        for aname in nnam:
            tmpf=tnames[aname].name
            slashid=tmpf.rfind('\\') # this may be an issue depending on slash type
            namefile=tmpf[slashid+1:-4] # drop extension
            fileobj=tobj.extractfile(tnames[aname]) # grab the file of the name from the archive
            tiff_array=np.asarray(bytearray(fileobj.read()),dtype=np.uint8) # get the image data
            img=cv2.imdecode(tiff_array,cv2.IMREAD_UNCHANGED) # convert byt array to an actual image
            imgc=cv2.cvtColor(img,bayer_pattern)
            img_c_8bit=cvtools.convert_to_8bit(imgc,settings)
            output=cvtools.extract_features(img_c_8bit,img,settings,save_to_disk=True,abs_path=root_out_path+'/',file_prefix=fout)
            


In [22]:
parser=argparse.ArgumentParser()
parser.add_argument("--input_dir",type=Path, required=True,help="Input Directory")
parser.add_argument("--output_dir",type=Path, required=True,help="Output Directory")
args=parser.parse_args(['--input_dir',root_out_path,'--output_dir',reformatpath])

In [24]:

pad_and_rescale(args.input_dir,args.output_dir)