In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import xml
import xml.etree.ElementTree as ET
import cv2
import math
import warnings
from skimage.transform import resize
import os
from scipy import ndimage
import shutil

In [None]:
# see if the noetbook accesses the GPU
tf.config.list_physical_devices("GPU")

##### Preprocessing for train data

In [None]:
# Train data locations
block_0101 = '../Block_1_TN/Block_1_images_and_xml'
block_0102 = '../Block_2_TN/Block_2_images_and_xml'
block_0203 = '../Block_9_TN/Block_9_images_and_xml'
block_0301 = '../Block_13_TN/Block_13_images_and_xml'

train_blocks = [block_0101, block_0102, block_0203, block_0301]

# valid data location
block_0204 = '../Block_10_TN/Block_10_images_and_xml'

valid_blocks = [block_0204]


# test data location
block_0103 = '../Block_3_TN/Block_3_images_and_xml'
block_0104 = '../Block_4_TN/Block_4_images_and_xml'
block_0105 = '../Block_5_TN/Block_5_images_and_xml'
block_0106 = '../Block_6_TN/Block_6_images_and_xml'
block_0201 = '../Block_7_TN/Block_7_images_and_xml'
block_0202 = '../Block_8_TN/Block_8_images_and_xml'
block_0205 = '../Block_11_TN/Block_11_images_and_xml'
block_0206 = '../Block_12_TN/Block_12_images_and_xml'
block_0302 = '../Block_14_TN/Block_14_images_and_xml'
block_0303 = '../Block_15_TN/Block_15_images_and_xml'
block_0304 = '../Block_16_TN/Block_16_images_and_xml'
block_0305 = '../Block_17_TN/Block_17_images_and_xml'
block_0306 = '../Block_18_TN/Block_18_images_and_xml'

test_blocks = [block_0103, block_0104, block_0105, block_0106, block_0201, block_0202, block_0205, block_0206, block_0302, block_0303, block_0304, block_0305, block_0306]

In [None]:
# list all the image and the xml files for the train data
all_train_contents = []
for item in train_blocks:
    block_contents = os.listdir(item)
    block_contents.sort()
    all_train_contents.append(block_contents)

In [None]:
# all_train_contents

In [None]:
# need to separate the image and the xml files - define a function
def separate_img_xml(content_list):
    image_files = []
    xml_files = []
    for file in content_list:
        if file.split(".")[-1] == 'jpeg':
            image_files.append(file)
            image_files.sort()
        else:
            xml_files.append(file)
            xml_files.sort()
    return(image_files, xml_files)

In [None]:
img_files_11, xml_files_11 = separate_img_xml(all_train_contents[0])
img_files_12, xml_files_12 = separate_img_xml(all_train_contents[1])
img_files_23, xml_files_23 = separate_img_xml(all_train_contents[2])
img_files_31, xml_files_31 = separate_img_xml(all_train_contents[3])

In [None]:
# xml_files_31

In [None]:
# separate the annotated and not annotated images
def separate_annot_and_not(img_list, xml_list):
    image_names = [name.split(".")[0] for name in xml_list]
    annotated_images = []
    not_annotated_images = []
    for file in img_list:
        if file.split('.')[0] in image_names:
            annotated_images.append(file)
        else:
            not_annotated_images.append(file)
    return(annotated_images, not_annotated_images)

In [None]:
annot_img_11, not_annot_img_11 = separate_annot_and_not(img_files_11, xml_files_11)
annot_img_12, not_annot_img_12 = separate_annot_and_not(img_files_12, xml_files_12)
annot_img_23, not_annot_img_23 = separate_annot_and_not(img_files_23, xml_files_23)
annot_img_31, not_annot_img_31 = separate_annot_and_not(img_files_31, xml_files_31)

In [None]:
# separete the vertically and horizontally annotated images
def separate_annnot_vertical_horizontal(annotated_images, all_xml_files, n):
    annot_vertical_images = annotated_images[:n]
    annot_vertical_xmls = all_xml_files[:n]
    annot_horizontal_images = annotated_images[n:]
    annot_horizontal_xmls = all_xml_files[n:]
    return(annot_vertical_images, annot_vertical_xmls, annot_horizontal_images, annot_horizontal_xmls)

In [None]:
annot_vertical_images_11, annot_vertical_xmls_11, annot_horizontal_images_11, annot_horizontal_xmls_11 = separate_annnot_vertical_horizontal(annot_img_11, 
                                                                                                                                             xml_files_11, 6)
annot_vertical_images_12, annot_vertical_xmls_12, annot_horizontal_images_12, annot_horizontal_xmls_12 = separate_annnot_vertical_horizontal(annot_img_12, 
                                                                                                                                             xml_files_12, 6)
annot_vertical_images_23, annot_vertical_xmls_23, annot_horizontal_images_23, annot_horizontal_xmls_23 = separate_annnot_vertical_horizontal(annot_img_23,
                                                                                                                                             xml_files_23, 6)
annot_vertical_images_31, annot_vertical_xmls_31, annot_horizontal_images_31, annot_horizontal_xmls_31 = separate_annnot_vertical_horizontal(annot_img_31, 
                                                                                                                                             xml_files_31, 6)

In [None]:
# check if the annotated horizontal and vertical file names match
def check_annot(annot_vertical_images, annot_vertical_xmls, annot_horizontal_images, annot_horizontal_xmls):
    # are the vertical image and xml files matching
    annot_vertical = []
    for i in range(len(annot_vertical_images)):
        if annot_vertical_images[i].split(".")[0] == annot_vertical_xmls[i].split(".")[0]:
            annot_vertical.append(1)
    print(len(annot_vertical), len(annot_vertical_images))

    annot_horizontal = []
    # are the horizontally annotated images and the xml files the same?
    for j in range(len(annot_horizontal_images)):
        if annot_horizontal_images[i].split(".")[0] == annot_horizontal_xmls[i].split(".")[0]:
            annot_horizontal.append(1)
    print(len(annot_horizontal), len(annot_horizontal_images))       

In [None]:
print(check_annot(annot_vertical_images_11, annot_vertical_xmls_11, annot_horizontal_images_11, annot_horizontal_xmls_11))
print(check_annot(annot_vertical_images_12, annot_vertical_xmls_12, annot_horizontal_images_12, annot_horizontal_xmls_12))
print(check_annot(annot_vertical_images_23, annot_vertical_xmls_23, annot_horizontal_images_23, annot_horizontal_xmls_23))
print(check_annot(annot_vertical_images_31, annot_vertical_xmls_31, annot_horizontal_images_31, annot_horizontal_xmls_31))

In [None]:
# now store the images as npy files?

# define a function to store the images as npy files - need two functions for vertical and horizontal separately - beacuse all images are read as horizontal images else.
def store_images_as_np_arrays_vertical(img_old_path, img_name, img_store_path):
    # join the path
    image_path = os.path.join(img_old_path, img_name)
    # read the image
    read_image = plt.imread(image_path)
    read_image = ndimage.rotate(read_image, 270)
    image_size = read_image.shape
    # show the image
    plt.imshow(read_image)
    plt.show()
    # save the image in new location
    np.save(img_store_path + '/' + img_name.split(".")[0] + '.npy', read_image)
    return(image_size)

In [None]:
annot_vertical_images_11

In [None]:
# store the vertically annotated images
store_path = "Images_and_dense_maps_npy_train/vertical_annotated_images"

# train_blocks = [block_0101, block_0102, block_0203, block_0301]
# blk 11
im_shapes_annot_vertical_11 = []
for item in annot_vertical_images_11:
    img_shape = store_images_as_np_arrays_vertical(block_0101, item, store_path)
    im_shapes_annot_vertical_11.append(img_shape)
print("Block 11 done!")

# blk 12
im_shapes_annot_vertical_12 = []
for item in annot_vertical_images_12:
    img_shape = store_images_as_np_arrays_vertical(block_0102, item, store_path)
    im_shapes_annot_vertical_12.append(img_shape)
print("Block 12 done!")

# blk 23
im_shapes_annot_vertical_23 = []
for item in annot_vertical_images_23:
    img_shape = store_images_as_np_arrays_vertical(block_0203, item, store_path)
    im_shapes_annot_vertical_23.append(img_shape)
print("Block 23 done!")

# blk 31
im_shapes_annot_vertical_31 = []
for item in annot_vertical_images_31:
    img_shape = store_images_as_np_arrays_vertical(block_0301, item, store_path)
    im_shapes_annot_vertical_31.append(img_shape)
print("Block 31 done!")

In [None]:
print(im_shapes_annot_vertical_11)
print(im_shapes_annot_vertical_12)
print(im_shapes_annot_vertical_23)
print(im_shapes_annot_vertical_31)

In [None]:
# store the vertically not-annotated images
store_path = "Images_and_dense_maps_npy_train/vertical_not_annotated_images"

# blk 11
im_shapes_notannot_vertical_11 = []
for item in not_annot_img_11:
    img_shape = store_images_as_np_arrays_vertical(block_0101, item, store_path)
    im_shapes_notannot_vertical_11.append(img_shape)
print("Block 11 done!")

# blk 12
im_shapes_notannot_vertical_12 = []
for item in not_annot_img_12:
    img_shape = store_images_as_np_arrays_vertical(block_0102, item, store_path)
    im_shapes_notannot_vertical_12.append(img_shape)
print("Block 12 done!")

# blk 23
im_shapes_notannot_vertical_23 = []
for item in not_annot_img_23:
    img_shape = store_images_as_np_arrays_vertical(block_0203, item, store_path)
    im_shapes_notannot_vertical_23.append(img_shape)
print("Block 23 done!")

# blk 31 - the first image needs to be rotated here
im_shapes_notannot_vertical_31 = []
for item in not_annot_img_31[1:]:
    img_shape = store_images_as_np_arrays_vertical(block_0301, item, store_path)
    im_shapes_notannot_vertical_31.append(img_shape)
print("Block 31 done!")

In [None]:
# save the outlier image in block 31 - the first vertical na image
def store_images_as_np_arrays_vertical_outlier(img_old_path, img_name, img_store_path):
    # join the path
    image_path = os.path.join(img_old_path, img_name)
    # read the image
    read_image = plt.imread(image_path)
    read_image = ndimage.rotate(read_image, 90)
    image_size = read_image.shape
    # show the image
    plt.imshow(read_image)
    plt.show()
    # save the image in new location
    np.save(img_store_path + '/' + img_name.split(".")[0] + '.npy', read_image)
    return(image_size)

In [None]:
img_shape_na_0_31 = store_images_as_np_arrays_vertical_outlier(block_0301, not_annot_img_31[0], store_path)

In [None]:
# define a function to store horizontal images
def store_images_as_np_arrays_horizontal(img_old_path, img_name, img_store_path):
    # join the path
    image_path = os.path.join(img_old_path, img_name)
    # read the image
    read_image = plt.imread(image_path)
    image_size = read_image.shape
    # show the image
    plt.imshow(read_image)
    plt.show()
    # save the image in new location
    np.save(img_store_path + '/' + img_name.split(".")[0] + '.npy', read_image)
    return(image_size)

In [None]:
# store the horizontally annotated images
store_path = "Images_and_dense_maps_npy_train/horizontal_annotated_images"

# blk 11 - the first horizontal image has a problem, needs to be rotated
im_shapes_annot_horizontal_11 = []
for item in annot_horizontal_images_11:
    img_shape = store_images_as_np_arrays_horizontal(block_0101, item, store_path)
    im_shapes_annot_horizontal_11.append(img_shape)
print("Block 11 done!")

# blk 12
im_shapes_annot_horizontal_12 = []
for item in annot_horizontal_images_12:
    img_shape = store_images_as_np_arrays_horizontal(block_0102, item, store_path)
    im_shapes_annot_horizontal_12.append(img_shape)
print("Block 12 done!")

# blk 23
im_shapes_annot_horizontal_23 = []
for item in annot_horizontal_images_23:
    img_shape = store_images_as_np_arrays_horizontal(block_0203, item, store_path)
    im_shapes_annot_horizontal_23.append(img_shape)
print("Block 23 done!")

# blk 31
im_shapes_annot_horizontal_31 = []
for item in annot_horizontal_images_31:
    img_shape = store_images_as_np_arrays_horizontal(block_0301, item, store_path)
    im_shapes_annot_horizontal_31.append(img_shape)
print("Block 31 done!")

create density maps

In [None]:
# maybe copy all the xml files corresponding to the train data into a single location? That would be easier with data prep I believe
# train_blocks
# train_blocks = [block_0101, block_0102, block_0203, block_0301]
new_store_path = 'train_xml_files'
# blk 11
for file in xml_files_11:
    joined_path = os.path.join(block_0101, file)
    # store the file in new path
    shutil.copy(joined_path, new_store_path)

# blk 12
for file in xml_files_12:
    joined_path = os.path.join(block_0102, file)
    # store the file in new path
    shutil.copy(joined_path, new_store_path)

# blk 23
for file in xml_files_23:
    joined_path = os.path.join(block_0203, file)
    # store the file in new path
    shutil.copy(joined_path, new_store_path)

# blk 31
for file in xml_files_31:
    joined_path = os.path.join(block_0301, file)
    # store the file in new path
    shutil.copy(joined_path, new_store_path)

In [None]:
# for vertically annotated images
def get_density_maps_vertical(file_name, image_path, xml_path, save_density_path):
    xml_file = file_name + '.xml'
    xml_file_path = os.path.join(xml_path, xml_file)

    # Get coords from the xml file
    # parse the xml file
    parsed_file = ET.parse(xml_file_path)
    # get the roots
    root = parsed_file.getroot()
    # get the roots here
    coords = []
    for child in root:
        for i in child:
            for j in i:
                coords.append(int(j.text))
    
    # chunk the points into sets of 4 - these are the coordinates of the bounding boxes
    points_tupples = []
    for i in range(0, len(coords), 4):
        points_tupples.append(coords[i:i + 4])

    # make a dataframe with these points
    coords_df = pd.DataFrame(points_tupples, columns = ["bleft_x", "bleft_y", "tright_x", "tright_y"])

    # compute the number of tassels in each image
    no_of_tassels = len(points_tupples)

    # compute the mid coordinates
    coords_df["mid_x"] = (round(0.5*(coords_df["bleft_x"] + coords_df["tright_x"]))).astype(int)
    coords_df["mid_y"] = (round(0.5*(coords_df["bleft_y"] + coords_df["tright_y"]))).astype(int)

    # extract the mid cordinates
    mid_coords = coords_df[["mid_x", "mid_y"]]
    warnings.filterwarnings("ignore")
    # cap the coords at the max height and width values
    mid_coords.loc[mid_coords['mid_x'] > 768, 'mid_x'] = 767
    mid_coords.loc[mid_coords['mid_y'] > 1024, 'mid_y'] = 1023
    

    # plot the bounding boxes on images
    # get image name and path
    image_name = file_name + '.npy'
    imge_file_path = os.path.join(image_path, image_name)
    # read the image
    read_image = np.load(imge_file_path)

    # check the shape of the read image
    read_image_shape = read_image.shape
    #  plot the bounding boxes on the image
    for points in points_tupples:
        annotated_image = cv2.rectangle(read_image, (points[0],points[1]), (points[2],points[3]), color = (255,0,0), thickness = 2)
    # plt.figure(figsize = (12,18))
    plt.imshow(annotated_image)
    plt.show()

    # plot the mid points on the image
    coords_list = mid_coords.values.tolist()
    # read the image again
    read_image_again = np.load(imge_file_path)
    # read_image_again = ndimage.rotate(read_image_again, 270)
    # draw the circles on image
    for i in coords_list:
        image_with_mids = cv2.circle(read_image_again, i, radius=5, color=(255, 0, 0), thickness=-1)
    # look at the annotated image
    # plt.figure(figsize = (12,18))
    plt.imshow(image_with_mids)
    plt.show()

    # also try creating the density map here
    # first create the empty maps
    np_image = np.zeros((read_image_shape[0], read_image_shape[1]))
    # get the dot maps
    for point in coords_list:
        np_image[point[1], point[0]] = 1
    # plot the image
    # plt.figure(figsize = (12,18))
    plt.imshow(np_image, cmap = "Greys")
    plt.show()

    # now define the kernel and run the convolution
    one_d_kerenel = cv2.getGaussianKernel(50,5)
    two_d_kernel = np.multiply(one_d_kerenel.T, one_d_kerenel)

    # Shape of the 2D kernel
    twoD_shape = two_d_kernel.shape
        
    # do the convolution
    convolution = ndimage.convolve(np_image, two_d_kernel)
        
    # plot the density map
    # plt.figure(figsize = (12,18))
    plt.imshow(convolution, cmap = "Greys")
    plt.show()
        
    # get the sums of the images
    img_sum = np.sum(convolution)

    # save the density map
    np.save(save_density_path + '/' + file_name + '_density_map.npy', convolution)

    return(file_name, read_image_shape, no_of_tassels, img_sum, convolution)

In [None]:
# get the outputs from this function
image_path = "Images_and_dense_maps_npy_train/vertical_annotated_images/"
xml_path = 'train_xml_files'
save_density_path = image_path

In [None]:
file_names_v = [file.split(".")[0] for file in os.listdir(image_path) if file.split(".")[0][-3:] != 'map']
file_names_v.sort()

In [None]:
# file_names_v

In [None]:
%%time
names_all_va = []
shapes_all_va = []
tasselCounts_all_va = []
tasselDensities_all_va = []
dense_maps_all_va = []
for file_name in file_names_v:
    name_va, shape_va, tassels_va, tassel_dense_va, dense_map_va = get_density_maps_vertical(file_name, image_path, xml_path, save_density_path)
    names_all_va.append(name_va)
    shapes_all_va.append(shape_va)
    tasselCounts_all_va.append(tassels_va)
    tasselDensities_all_va.append(tassel_dense_va)
    dense_maps_all_va.append(dense_map_va)

In [None]:
# a few sanity checks
np.mean(tasselCounts_all_va == np.round(tasselDensities_all_va))

In [None]:
# create a dataframe of the true counts
True_tasselcounts_df_vertical_annot = pd.DataFrame(zip(names_all_va, tasselCounts_all_va), columns = ['name', 'true_count'])

In [None]:
True_tasselcounts_df_vertical_annot.shape

In [None]:
True_tasselcounts_df_vertical_annot.head()

In [None]:
# save the true counts
True_tasselcounts_df_vertical_annot.to_csv("True_tassel_counts/all_data/vertical_annotated_true_counts.csv", index = False)

In [None]:
# create density maps for the horizontally annotated images
def get_density_maps_horizontal(file_name, image_path, xml_path, save_density_path):
    xml_file = file_name + '.xml'
    xml_file_path = os.path.join(xml_path, xml_file)

    # Get coords from the xml file
    # parse the xml file
    parsed_file = ET.parse(xml_file_path)
    # get the roots
    root = parsed_file.getroot()
    # get the roots here
    coords = []
    for child in root:
        for i in child:
            for j in i:
                coords.append(int(j.text))
    
    # chunk the points into sets of 4 - these are the coordinates of the bounding boxes
    points_tupples = []
    for i in range(0, len(coords), 4):
        points_tupples.append(coords[i:i + 4])

    # make a dataframe with these points
    coords_df = pd.DataFrame(points_tupples, columns = ["bleft_x", "bleft_y", "tright_x", "tright_y"])

    # compute the number of tassels in each image
    no_of_tassels = len(points_tupples)

    # compute the mid coordinates
    coords_df["mid_x"] = (round(0.5*(coords_df["bleft_x"] + coords_df["tright_x"]))).astype(int)
    coords_df["mid_y"] = (round(0.5*(coords_df["bleft_y"] + coords_df["tright_y"]))).astype(int)

    # extract the mid cordinates
    mid_coords = coords_df[["mid_x", "mid_y"]]
    # cap the coords at the max height and width values
    mid_coords.loc[mid_coords['mid_x'] > 1024, 'mid_x'] = 1023
    mid_coords.loc[mid_coords['mid_y'] > 768, 'mid_y'] = 767
    warnings.filterwarnings("ignore")

    # plot the bounding boxes on images
    # get image name and path
    image_name = file_name + '.npy'
    imge_file_path = os.path.join(image_path, image_name)
    # read the image
    read_image = np.load(imge_file_path)
    # check the shape of the read image
    read_image_shape = read_image.shape
    #  plot the bounding boxes on the image
    for points in points_tupples:
        annotated_image = cv2.rectangle(read_image, (points[0],points[1]), (points[2],points[3]), color = (255,0,0), thickness = 2)
    # plt.figure(figsize = (12,18))
    plt.imshow(annotated_image)
    plt.show()

    # plot the mid points on the image
    coords_list = mid_coords.values.tolist()
    # read the image again
    read_image_again = np.load(imge_file_path)
    # draw the circles on image
    for i in coords_list:
        image_with_mids = cv2.circle(read_image_again, i, radius=5, color=(255, 0, 0), thickness=-1)
    # look at the annotated image
    # plt.figure(figsize = (12,18))
    plt.imshow(image_with_mids)
    plt.show()

    # also try creating the density map here
    # first create the empty maps
    np_image = np.zeros((read_image_shape[0], read_image_shape[1]))
    # get the dot maps
    for point in coords_list:
        np_image[point[1], point[0]] = 1
    # plot the image
    # plt.figure(figsize = (12,18))
    plt.imshow(np_image, cmap = "Greys")
    plt.show()

    # now define the kernel and run the convolution
    one_d_kerenel = cv2.getGaussianKernel(50,5)
    two_d_kernel = np.multiply(one_d_kerenel.T, one_d_kerenel)

    # Shape of the 2D kernel
    twoD_shape = two_d_kernel.shape
        
    # do the convolution
    convolution = ndimage.convolve(np_image, two_d_kernel)
        
    # plot the density map
    # plt.figure(figsize = (12,18))
    plt.imshow(convolution, cmap = "Greys")
    plt.show()
        
    # get the sums of the images
    img_sum = np.sum(convolution)

    # save the density map
    np.save(save_density_path + '/' + file_name + '_density_map.npy', convolution)

    return(file_name, read_image_shape, no_of_tassels, img_sum, convolution)

In [None]:
# get the outputs from this function
image_path_h = "Images_and_dense_maps_npy_train/horizontal_annotated_images/"
xml_path_h = 'train_xml_files'
save_density_path_h = image_path_h

In [None]:
file_names_h = [file.split(".")[0] for file in os.listdir(image_path_h) if file.split(".")[0][-3:] != 'map']
file_names_h.sort()

In [None]:
%%time
names_all_h = []
shapes_all_h = []
tasselCounts_all_h = []
tasselDensities_all_h = []
dense_maps_all_h = []
for file_h in file_names_h:
    name_h, shape_h, tassels_h, tassel_dense_h, dense_map_h = get_density_maps_horizontal(file_h, image_path_h, xml_path_h, save_density_path_h)
    names_all_h.append(name_h)
    shapes_all_h.append(shape_h)
    tasselCounts_all_h.append(tassels_h)
    tasselDensities_all_h.append(tassel_dense_h)
    dense_maps_all_h.append(dense_map_h)

In [None]:
# a few sanity checks
np.mean(tasselCounts_all_h == np.round(tasselDensities_all_h))

In [None]:
# create a dataframe of the true counts
True_tasselcounts_df_horizontal_annot = pd.DataFrame(zip(names_all_h, tasselCounts_all_h), columns = ['name', 'true_count'])

In [None]:
True_tasselcounts_df_horizontal_annot.shape

In [None]:
True_tasselcounts_df_horizontal_annot.head()

In [None]:
# save the true counts
True_tasselcounts_df_horizontal_annot.to_csv("True_tassel_counts/all_data/horizontal_annotated_true_counts.csv", index = False)

In [None]:
# for vertically annotated images
def get_density_maps_not_annotated(file_name, image_path, save_density_path):
    
    # plot the bounding boxes on images
    # get image name and path
    image_name = file_name + '.npy'
    imge_file_path = os.path.join(image_path, image_name)
    # read the image
    read_image = np.load(imge_file_path)

    plt.imshow(read_image)
    plt.show()

    read_image_shape = read_image.shape
    
    # also try creating the density map here
    np_image = np.zeros((read_image_shape[0], read_image_shape[1]))

    # save the density map
    np.save(save_density_path + '/' + file_name + '_density_map.npy', np_image)

    img_sum = np.sum(np_image)

    return(file_name, read_image_shape, img_sum, np_image)

In [None]:
image_path_vn = 'Images_and_dense_maps_npy_train/vertical_not_annotated_images/'
save_density_path_vn = image_path_vn

In [None]:
file_names_vn = [file.split(".")[0] for file in os.listdir(image_path_vn) if file.split(".")[0][-3:] != 'map']
file_names_vn.sort()

In [None]:
len(file_names_vn)

In [None]:
%%time
# get the density maps for all vertically annotated image files
names_all_vn = []
shapes_all_vn = []
tasselDensities_all_vn = []
dense_maps_all_vn = []
for file_vn in file_names_vn:
    name_vn, shape_vn, tassel_dense_vn, dense_map_vn = get_density_maps_not_annotated(file_vn, image_path_vn, save_density_path_vn)
    names_all_vn.append(name_vn)
    shapes_all_vn.append(shape_vn)
    tasselDensities_all_vn.append(tassel_dense_vn)
    dense_maps_all_vn.append(dense_map_vn)
    

In [None]:
Vertical_not_annotated_df = pd.DataFrame(zip(names_all_vn, tasselDensities_all_vn), columns = ['name', 'true_count'])

In [None]:
Vertical_not_annotated_df.head()

In [None]:
Vertical_not_annotated_df.shape

In [None]:
# save the true counts
Vertical_not_annotated_df.to_csv("True_tassel_counts/all_data/vertical_not_annotated_true_counts.csv", index = False)

In [None]:
# move all the image and the xml files to a single location

va_images_dense = 'Images_and_dense_maps_npy_train/vertical_annotated_images/'
vn_images_dense = 'Images_and_dense_maps_npy_train/vertical_not_annotated_images/'
h_images_dense = 'Images_and_dense_maps_npy_train/horizontal_annotated_images/'

In [None]:
shutil.copytree(va_images_dense, 'Preprocessed_train_data/all_img_density_files/', dirs_exist_ok=True)
shutil.copytree(vn_images_dense, 'Preprocessed_train_data/all_img_density_files/', dirs_exist_ok=True)
shutil.copytree(h_images_dense, 'Preprocessed_train_data/all_img_density_files/', dirs_exist_ok=True)

Create sub windows and counts

In [None]:
def create_subwindows_and_counts(image, numpy_folder, stride = 8, kernel_size = 32):
    im_name = image.split(".")[0]
    im_file = im_name + '.npy'
    count_file = im_name + '_density_map.npy'
    # load the image and the count numpy files
    loaded_im_file = np.load(numpy_folder + '/' + im_file)
    loaded_count_file = np.load(numpy_folder + '/' + count_file)
        
    # create the subwindows and counts as follows
    img_height = loaded_im_file.shape[0]
    img_width = loaded_im_file.shape[1]
    
    density_sums = []
    catch_image = []
    for i in  range(0, img_height, stride):
        for j in range(0, img_width, stride):
            sub_window = loaded_im_file[i: i + kernel_size, j : j + kernel_size,:]
            density = loaded_count_file[i: i + kernel_size, j : j + kernel_size]
            dense_sum = np.sum(density)
            density_sums.append(dense_sum)
            sub_window = resize(sub_window, (32, 32,3))
            catch_image.append(sub_window)

    # save the combined subwindows and counts
    return(catch_image,density_sums, im_file)

In [None]:
# For the train data, it is unnecessary to save the individual sub count and window files, let's save them with the stack

In [None]:
# let's do this for a sample and then in the loop
train_files_path = "Preprocessed_train_data/all_img_density_files/"

In [None]:
train_im_and_map_contents = os.listdir(train_files_path)

# sort these - ALWAYS sort these as the order is always messed up on HCC
train_im_and_map_contents.sort()

In [None]:
len(train_im_and_map_contents)

In [None]:
# get only the names of the image (npy files)
train_im_names = [item for item in train_im_and_map_contents if item.split(".")[0][-3:] != 'map']

In [None]:
# how many? should be 32
len(train_im_names)

In [None]:
%%time
# create the subwindows for all train data
catch_all_image_subwindows_train = []
catch_all_dense_subwindows_train = []
catch_train_names = []
for image in train_im_names:
    train_ims, train_maps, train_names = create_subwindows_and_counts(image, train_files_path, stride = 8, kernel_size = 32)
    catch_all_image_subwindows_train.append(train_ims)
    catch_all_dense_subwindows_train.append(train_maps)
    catch_train_names.append(train_names)

In [None]:
# sanity check names
np.mean(train_im_names == catch_train_names)

##### Preprocessing for validation data

In [None]:
# list all the image and the xml files for the train data
all_valid_contents = os.listdir(block_0204)

In [None]:
# separate image and xml files
img_files_24, xml_files_24 = separate_img_xml(all_valid_contents)

In [None]:
# separate annotated and not annotated images
annot_img_24, not_annot_img_24 = separate_annot_and_not(img_files_24, xml_files_24)

In [None]:
# separate vertical and horizontal annotated images
annot_vertical_images_24, annot_vertical_xmls_24, annot_horizontal_images_24, annot_horizontal_xmls_24 = separate_annnot_vertical_horizontal(annot_img_24, 
                                                                                                                                             xml_files_24, 8)

In [None]:
# check shapes
print(check_annot(annot_vertical_images_24, annot_vertical_xmls_24, annot_horizontal_images_24, annot_horizontal_xmls_24))

In [None]:
# store the vertically annotated images - validation data
store_path = "Images_and_dense_maps_npy_valid/vertical_annotated_images"

# blk 24
im_shapes_annot_vertical_24 = []
for item in annot_vertical_images_24:
    img_shape = store_images_as_np_arrays_vertical(block_0204, item, store_path)
    im_shapes_annot_vertical_24.append(img_shape)
print("Block 24 done!")

In [None]:
print(im_shapes_annot_vertical_24)

In [None]:
# store the vertically not-annotated images
store_path = "Images_and_dense_maps_npy_valid/vertical_not_annotated_images"

# blk 24
im_shapes_notannot_vertical_24 = []
for item in not_annot_img_24:
    img_shape = store_images_as_np_arrays_vertical(block_0204, item, store_path)
    im_shapes_notannot_vertical_24.append(img_shape)
print("Block 24 done!")

In [None]:
print(im_shapes_notannot_vertical_24)

In [None]:
# store the horizontally annotated images
store_path = "Images_and_dense_maps_npy_valid/horizontal_annotated_images"

# blk 11 - the first horizontal image has a problem, needs to be rotated
im_shapes_annot_horizontal_24 = []
for item in annot_horizontal_images_24:
    img_shape = store_images_as_np_arrays_horizontal(block_0204, item, store_path)
    im_shapes_annot_horizontal_24.append(img_shape)
print("Block 24 done!")

In [None]:
# Get the xml file paths in a single location
new_store_path = 'valid_xml_files'
# blk 11
for file in xml_files_24:
    joined_path = os.path.join(block_0204, file)
    # store the file in new path
    shutil.copy(joined_path, new_store_path)

In [None]:
# get the outputs from this function
image_path = "Images_and_dense_maps_npy_valid/vertical_annotated_images/"
xml_path = 'valid_xml_files'
save_density_path = image_path

In [None]:
file_names_v = [file.split(".")[0] for file in os.listdir(image_path) if file.split(".")[0][-3:] != 'map']
file_names_v.sort()

In [None]:
# file_names_v

In [None]:
%%time
names_all_va_v = []
shapes_all_va_v = []
tasselCounts_all_va_v = []
tasselDensities_all_va_v = []
dense_maps_all_va_v = []
for file_name in file_names_v:
    name_va, shape_va, tassels_va, tassel_dense_va, dense_map_va = get_density_maps_vertical(file_name, image_path, xml_path, save_density_path)
    names_all_va_v.append(name_va)
    shapes_all_va_v.append(shape_va)
    tasselCounts_all_va_v.append(tassels_va)
    tasselDensities_all_va_v.append(tassel_dense_va)
    dense_maps_all_va_v.append(dense_map_va)

In [None]:
# a few sanity checks
np.mean(tasselCounts_all_va == np.round(tasselDensities_all_va))

In [None]:
# create a dataframe of the true counts
True_tasselcounts_df_vertical_annot_valid = pd.DataFrame(zip(names_all_va_v, tasselCounts_all_va_v), columns = ['name', 'true_count'])

In [None]:
True_tasselcounts_df_vertical_annot_valid.shape

In [None]:
True_tasselcounts_df_vertical_annot_valid.head()

In [None]:
# save the true counts
True_tasselcounts_df_vertical_annot_valid.to_csv("True_tassel_counts/all_data/vertical_annotated_true_counts_valid.csv", index = False)

In [None]:
# get the outputs from this function
image_path_h = "Images_and_dense_maps_npy_valid/horizontal_annotated_images/"
xml_path_h = 'valid_xml_files'
save_density_path_h = image_path_h

In [None]:
file_names_h = [file.split(".")[0] for file in os.listdir(image_path_h) if file.split(".")[0][-3:] != 'map']
file_names_h.sort()

In [None]:
# file_names_h

In [None]:
%%time
names_all_h_v = []
shapes_all_h_v = []
tasselCounts_all_h_v = []
tasselDensities_all_h_v = []
dense_maps_all_h_v = []
for file_h in file_names_h:
    name_h, shape_h, tassels_h, tassel_dense_h, dense_map_h = get_density_maps_horizontal(file_h, image_path_h, xml_path_h, save_density_path_h)
    names_all_h_v.append(name_h)
    shapes_all_h_v.append(shape_h)
    tasselCounts_all_h_v.append(tassels_h)
    tasselDensities_all_h_v.append(tassel_dense_h)
    dense_maps_all_h_v.append(dense_map_h)

In [None]:
# a few sanity checks
np.mean(tasselCounts_all_h == np.round(tasselDensities_all_h))

In [None]:
# create a dataframe of the true counts
True_tasselcounts_df_horizontal_annot_valid = pd.DataFrame(zip(names_all_h_v, tasselCounts_all_h_v), columns = ['name', 'true_count'])

In [None]:
True_tasselcounts_df_horizontal_annot_valid.shape

In [None]:
True_tasselcounts_df_horizontal_annot_valid.head()

In [None]:
# save the true counts
True_tasselcounts_df_horizontal_annot_valid.to_csv("True_tassel_counts/all_data/horizontal_annotated_true_counts_valid.csv", index = False)

In [None]:
image_path_vn = 'Images_and_dense_maps_npy_valid/vertical_not_annotated_images/'
save_density_path_vn = image_path_vn

In [None]:
file_names_vn = [file.split(".")[0] for file in os.listdir(image_path_vn) if file.split(".")[0][-3:] != 'map']
file_names_vn.sort()

In [None]:
len(file_names_vn)

In [None]:
%%time
# get the density maps for all vertically annotated image files
names_all_vn_v = []
shapes_all_vn_v = []
tasselDensities_all_vn_v = []
dense_maps_all_vn_v = []
for file_vn in file_names_vn:
    name_vn, shape_vn, tassel_dense_vn, dense_map_vn = get_density_maps_not_annotated(file_vn, image_path_vn, save_density_path_vn)
    names_all_vn_v.append(name_vn)
    shapes_all_vn_v.append(shape_vn)
    tasselDensities_all_vn_v.append(tassel_dense_vn)
    dense_maps_all_vn_v.append(dense_map_vn)
    

In [None]:
Vertical_not_annotated_df_valid = pd.DataFrame(zip(names_all_vn_v, tasselDensities_all_vn_v), columns = ['name', 'true_count'])

In [None]:
Vertical_not_annotated_df_valid.head()

In [None]:
Vertical_not_annotated_df.shape

In [None]:
# save the true counts
Vertical_not_annotated_df.to_csv("True_tassel_counts/all_data/vertical_not_annotated_true_countsvalid.csv", index = False)

In [None]:
# move all the image and the xml files to a single location

va_images_dense = 'Images_and_dense_maps_npy_valid/vertical_annotated_images/'
vn_images_dense = 'Images_and_dense_maps_npy_valid/vertical_not_annotated_images/'
h_images_dense = 'Images_and_dense_maps_npy_valid/horizontal_annotated_images/'

In [None]:
shutil.copytree(va_images_dense, 'Preprocessed_valid_data/all_img_density_files/', dirs_exist_ok=True)
shutil.copytree(vn_images_dense, 'Preprocessed_valid_data/all_img_density_files/', dirs_exist_ok=True)
shutil.copytree(h_images_dense, 'Preprocessed_valid_data/all_img_density_files/', dirs_exist_ok=True)

In [None]:
# let's do this for a sample and then in the loop
valid_files_path = "Preprocessed_valid_data/all_img_density_files/"

In [None]:
valid_im_and_map_contents = os.listdir(valid_files_path)

# sort these - ALWAYS sort these as the order is always messed up on HCC
valid_im_and_map_contents.sort()

In [None]:
len(valid_im_and_map_contents)

In [None]:
# get only the names of the image (npy files)
valid_im_names = [item for item in valid_im_and_map_contents if item.split(".")[0][-3:] != 'map']

In [None]:
# how many? should be 32
len(valid_im_names)

In [None]:
%%time
# create the subwindows for all train data
catch_all_image_subwindows_valid = []
catch_all_dense_subwindows_valid = []
catch_valid_names = []
for image in valid_im_names:
    valid_ims, valid_maps, valid_names = create_subwindows_and_counts(image, valid_files_path, stride = 8, kernel_size = 32)
    catch_all_image_subwindows_valid.append(valid_ims)
    catch_all_dense_subwindows_valid.append(valid_maps)
    catch_valid_names.append(valid_names)

In [None]:
# sanity check names
np.mean(valid_im_names == catch_valid_names)

In [None]:
# stack the images
valid_im_stack = np.vstack(catch_all_image_subwindows_valid)
print(valid_im_stack.shape)

# stack the subcounts
valid_count_stack = np.hstack(catch_all_dense_subwindows_valid)
print(valid_count_stack.shape)


In [None]:
index = 31
for i in range(index):
    print(np.mean(valid_im_stack[12288*index:12288+12288*index,:,:,:] == catch_all_image_subwindows_valid[index]), np.mean(valid_count_stack[12288*index:12288+12288*index,] == catch_all_dense_subwindows_valid[index]))


In [None]:
# do a little more sanity checks to make sure the stacking is correctly done
# for images

# need to save these files
valid_save_path = 'final_valid_sub_windows_and_counts'

# save the sub images
np.save(valid_save_path + "/" + "valid_sub_windows.npy", valid_im_stack)
# save the sub counts
np.save(valid_save_path + "/" + "valid_sub_counts.npy", valid_count_stack)

##### Preprocessing for test data

Block 0103

In [None]:
# need the images saved as npy arrays and let's also get the density counts, JIC.

In [None]:
# list all the image and the xml files for the train data
test_contents_13 = os.listdir(block_0103)
# separate image and xml files
img_files_13, xml_files_13 = separate_img_xml(test_contents_13)
# separate annotated and not annotated images
annot_img_13, not_annot_img_13 = separate_annot_and_not(img_files_13, xml_files_13)
# separate vertical and horizontal annotated images
annot_vertical_images_13, annot_vertical_xmls_13, annot_horizontal_images_13, annot_horizontal_xmls_13 = separate_annnot_vertical_horizontal(annot_img_13, 
                                                                                                                                             xml_files_13, 6)
# check shapes
print(check_annot(annot_vertical_images_13, annot_vertical_xmls_13, annot_horizontal_images_13, annot_horizontal_xmls_13))

In [None]:
# store the vertically annotated images - validation data
store_path = "Images_and_dense_maps_npy_test/vertical_annotated_images/Block_13"

# blk 24
im_shapes_annot_vertical_13 = []
for item in annot_vertical_images_13:
    img_shape = store_images_as_np_arrays_vertical(block_0103, item, store_path)
    im_shapes_annot_vertical_13.append(img_shape)
print("Block 13 done!")

In [None]:
print(im_shapes_annot_vertical_13)

In [None]:
# store the vertically not-annotated images
store_path = "Images_and_dense_maps_npy_test/vertical_not_annotated_images/Block_13"

# blk 13
im_shapes_notannot_vertical_13 = []
for item in not_annot_img_13:
    img_shape = store_images_as_np_arrays_vertical(block_0103, item, store_path)
    im_shapes_notannot_vertical_13.append(img_shape)
print("Block 13 done!")

In [None]:
print(im_shapes_notannot_vertical_13)

In [None]:
# store the horizontally annotated images
store_path = "Images_and_dense_maps_npy_test/horizontal_annotated_images/Block_13"

# blk 11 - the first horizontal image has a problem, needs to be rotated
im_shapes_annot_horizontal_13 = []
for item in annot_horizontal_images_13:
    img_shape = store_images_as_np_arrays_horizontal(block_0103, item, store_path)
    im_shapes_annot_horizontal_13.append(img_shape)
print("Block 13 done!")

In [None]:
print(im_shapes_annot_horizontal_13)

In [None]:
# xml_files_13

In [None]:
# Get the xml file paths in a single location
new_store_path = 'test_xml_files'
# blk 13
for file in xml_files_13:
    joined_path = os.path.join(block_0103, file)
    # store the file in new path
    shutil.copy(joined_path, new_store_path)

In [None]:
# get the outputs from this function
image_path = "Images_and_dense_maps_npy_test/vertical_annotated_images/Block_13/"
xml_path = 'test_xml_files'
save_density_path = image_path

In [None]:
file_names_v = [file.split(".")[0] for file in os.listdir(image_path) if file.split(".")[0][-3:] != 'map']
file_names_v.sort()

In [None]:
# file_names_v

In [None]:
%%time
names_all_va_v = []
shapes_all_va_v = []
tasselCounts_all_va_v = []
tasselDensities_all_va_v = []
dense_maps_all_va_v = []
for file_name in file_names_v:
    name_va, shape_va, tassels_va, tassel_dense_va, dense_map_va = get_density_maps_vertical(file_name, image_path, xml_path, save_density_path)
    names_all_va_v.append(name_va)
    shapes_all_va_v.append(shape_va)
    tasselCounts_all_va_v.append(tassels_va)
    tasselDensities_all_va_v.append(tassel_dense_va)
    dense_maps_all_va_v.append(dense_map_va)

In [None]:
# a few sanity checks
np.mean(tasselCounts_all_va_v == np.round(tasselDensities_all_va_v))

In [None]:
# create a dataframe of the true counts
True_tasselcounts_df_vertical_annot_test_13 = pd.DataFrame(zip(names_all_va_v, tasselCounts_all_va_v), columns = ['name', 'true_count'])

In [None]:
True_tasselcounts_df_vertical_annot_test_13.shape

In [None]:
True_tasselcounts_df_vertical_annot_test_13.head()

In [None]:
# get the outputs from this function
image_path_h = "Images_and_dense_maps_npy_test/horizontal_annotated_images/Block_13"
xml_path_h = 'test_xml_files'
save_density_path_h = image_path_h

In [None]:
file_names_h = [file.split(".")[0] for file in os.listdir(image_path_h) if file.split(".")[0][-3:] != 'map']
file_names_h.sort()

In [None]:
# file_names_h

In [None]:
%%time
names_all_h_v = []
shapes_all_h_v = []
tasselCounts_all_h_v = []
tasselDensities_all_h_v = []
dense_maps_all_h_v = []
for file_h in file_names_h:
    name_h, shape_h, tassels_h, tassel_dense_h, dense_map_h = get_density_maps_horizontal(file_h, image_path_h, xml_path_h, save_density_path_h)
    names_all_h_v.append(name_h)
    shapes_all_h_v.append(shape_h)
    tasselCounts_all_h_v.append(tassels_h)
    tasselDensities_all_h_v.append(tassel_dense_h)
    dense_maps_all_h_v.append(dense_map_h)

In [None]:
# a few sanity checks
np.mean(tasselCounts_all_h_v == np.round(tasselDensities_all_h_v))

In [None]:
# create a dataframe of the true counts
True_tasselcounts_df_horizontal_annot_test_13 = pd.DataFrame(zip(names_all_h_v, tasselCounts_all_h_v), columns = ['name', 'true_count'])

In [None]:
True_tasselcounts_df_horizontal_annot_test_13.shape

In [None]:
True_tasselcounts_df_horizontal_annot_test_13.head()

In [None]:
image_path_vn = 'Images_and_dense_maps_npy_test/vertical_not_annotated_images/Block_13'
save_density_path_vn = image_path_vn

In [None]:
file_names_vn = [file.split(".")[0] for file in os.listdir(image_path_vn) if file.split(".")[0][-3:] != 'map']
file_names_vn.sort()

In [None]:
len(file_names_vn)

In [None]:
file_names_vn

In [None]:
%%time
# get the density maps for all vertically annotated image files
names_all_vn_v = []
shapes_all_vn_v = []
tasselDensities_all_vn_v = []
dense_maps_all_vn_v = []
for file_vn in file_names_vn:
    name_vn, shape_vn, tassel_dense_vn, dense_map_vn = get_density_maps_not_annotated(file_vn, image_path_vn, save_density_path_vn)
    names_all_vn_v.append(name_vn)
    shapes_all_vn_v.append(shape_vn)
    tasselDensities_all_vn_v.append(tassel_dense_vn)
    dense_maps_all_vn_v.append(dense_map_vn)
    

In [None]:
Vertical_not_annotated_df_test_13 = pd.DataFrame(zip(names_all_vn_v, tasselDensities_all_vn_v), columns = ['name', 'true_count'])

In [None]:
Vertical_not_annotated_df_test_13.head()

In [None]:
Vertical_not_annotated_df_test_13.shape

In [None]:
# stack and save the dataframe of testset true counts
test_data_13 = pd.concat((Vertical_not_annotated_df_test_13, True_tasselcounts_df_vertical_annot_test_13, True_tasselcounts_df_horizontal_annot_test_13), axis = 0)

In [None]:
test_data_13.head()

In [None]:
test_data_13.shape

In [None]:
# saev this dataframe
test_data_13.to_csv("True_tassel_counts/test_data/true_test_counts_blk_13.csv", index = False)

In [None]:
# move all the image and the xml files to a single location

va_images_dense = 'Images_and_dense_maps_npy_test/vertical_annotated_images/Block_13'
vn_images_dense = 'Images_and_dense_maps_npy_test/vertical_not_annotated_images/Block_13'
h_images_dense = 'Images_and_dense_maps_npy_test/horizontal_annotated_images/Block_13'

In [None]:
shutil.copytree(va_images_dense, 'Preprocessed_test_data/all_img_density_files/Block_13', dirs_exist_ok=True)
shutil.copytree(vn_images_dense, 'Preprocessed_test_data/all_img_density_files/Block_13', dirs_exist_ok=True)
shutil.copytree(h_images_dense, 'Preprocessed_test_data/all_img_density_files/Block_13', dirs_exist_ok=True)

In [None]:
test_files_path_13 = "Preprocessed_test_data/all_img_density_files/Block_13"

In [None]:
test_im_and_map_contents_13 = os.listdir(test_files_path_13)

# sort these - ALWAYS sort these as the order is always messed up on HCC
test_im_and_map_contents_13.sort()

In [None]:
len(test_im_and_map_contents_13)

In [None]:
# get only the names of the image (npy files)
test_im_names_13 = [item for item in test_im_and_map_contents_13 if item.split(".")[0][-3:] != 'map']

In [None]:
# how many? should be 32
len(test_im_names_13)

In [None]:
# test_im_names_13

In [None]:
%%time
# get the subwindows and counts for test data

im_names = []
for image in test_im_names_13:
    test_ims, test_maps, test_names = create_subwindows_and_counts(image, test_files_path_13, stride = 8, kernel_size = 32)
    test_im_array = np.array(test_ims)
    test_map_array = np.array(test_maps)
    print(image, test_im_array.shape, test_map_array.shape)
    # save the arrays
    # image stack
    save_path = 'final_test_sub_windows_and_counts/Block_13'
    np.save(save_path + '/' + 'test_ims_' + image, test_im_array)
    # count stack
    np.save(save_path + '/' + 'test_counts_' + image, test_im_array)
    im_names.append(test_names)

Block 0104

In [None]:
# need the images saved as npy arrays and let's also get the density counts, JIC.

In [None]:
# list all the image and the xml files for the train data
test_contents_14 = os.listdir(block_0104)
# separate image and xml files
img_files_14, xml_files_14 = separate_img_xml(test_contents_14)
# separate annotated and not annotated images
annot_img_14, not_annot_img_14 = separate_annot_and_not(img_files_14, xml_files_14)
# separate vertical and horizontal annotated images
annot_vertical_images_14, annot_vertical_xmls_14, annot_horizontal_images_14, annot_horizontal_xmls_14 = separate_annnot_vertical_horizontal(annot_img_14, 
                                                                                                                                             xml_files_14, 6)
# check shapes
print(check_annot(annot_vertical_images_14, annot_vertical_xmls_14, annot_horizontal_images_14, annot_horizontal_xmls_14))

In [None]:
# store the vertically annotated images - validation data
store_path = "Images_and_dense_maps_npy_test/vertical_annotated_images/Block_14"

# blk 24
im_shapes_annot_vertical_14 = []
for item in annot_vertical_images_14:
    img_shape = store_images_as_np_arrays_vertical(block_0104, item, store_path)
    im_shapes_annot_vertical_14.append(img_shape)
print("Block 14 done!")

In [None]:
print(im_shapes_annot_vertical_14)

In [None]:
# store the vertically not-annotated images
store_path = "Images_and_dense_maps_npy_test/vertical_not_annotated_images/Block_14"

# blk 13
im_shapes_notannot_vertical_14 = []
for item in not_annot_img_14:
    img_shape = store_images_as_np_arrays_vertical(block_0104, item, store_path)
    im_shapes_notannot_vertical_14.append(img_shape)
print("Block 14 done!")

In [None]:
print(im_shapes_notannot_vertical_14)

In [None]:
# store the horizontally annotated images
store_path = "Images_and_dense_maps_npy_test/horizontal_annotated_images/Block_14"

# blk 11 - the first horizontal image has a problem, needs to be rotated
im_shapes_annot_horizontal_14 = []
for item in annot_horizontal_images_14:
    img_shape = store_images_as_np_arrays_horizontal(block_0104, item, store_path)
    im_shapes_annot_horizontal_14.append(img_shape)
print("Block 13 done!")

In [None]:
print(im_shapes_annot_horizontal_14)

In [None]:
# xml_files_14

In [None]:
# Get the xml file paths in a single location
new_store_path = 'test_xml_files'
# blk 13
for file in xml_files_14:
    joined_path = os.path.join(block_0104, file)
    # store the file in new path
    shutil.copy(joined_path, new_store_path)

In [None]:
# get the outputs from this function
image_path = "Images_and_dense_maps_npy_test/vertical_annotated_images/Block_14/"
xml_path = 'test_xml_files'
save_density_path = image_path

In [None]:
file_names_v = [file.split(".")[0] for file in os.listdir(image_path) if file.split(".")[0][-3:] != 'map']
file_names_v.sort()

In [None]:
# file_names_v

In [None]:
%%time
names_all_va_v = []
shapes_all_va_v = []
tasselCounts_all_va_v = []
tasselDensities_all_va_v = []
dense_maps_all_va_v = []
for file_name in file_names_v:
    name_va, shape_va, tassels_va, tassel_dense_va, dense_map_va = get_density_maps_vertical(file_name, image_path, xml_path, save_density_path)
    names_all_va_v.append(name_va)
    shapes_all_va_v.append(shape_va)
    tasselCounts_all_va_v.append(tassels_va)
    tasselDensities_all_va_v.append(tassel_dense_va)
    dense_maps_all_va_v.append(dense_map_va)

In [None]:
# a few sanity checks
np.mean(tasselCounts_all_va_v == np.round(tasselDensities_all_va_v))

In [None]:
# create a dataframe of the true counts
True_tasselcounts_df_vertical_annot_test_13 = pd.DataFrame(zip(names_all_va_v, tasselCounts_all_va_v), columns = ['name', 'true_count'])

In [None]:
True_tasselcounts_df_vertical_annot_test_13.shape

In [None]:
True_tasselcounts_df_vertical_annot_test_13.head()

In [None]:
# get the outputs from this function
image_path_h = "Images_and_dense_maps_npy_test/horizontal_annotated_images/Block_14"
xml_path_h = 'test_xml_files'
save_density_path_h = image_path_h

In [None]:
file_names_h = [file.split(".")[0] for file in os.listdir(image_path_h) if file.split(".")[0][-3:] != 'map']
file_names_h.sort()

In [None]:
# file_names_h

In [None]:
%%time
names_all_h_v = []
shapes_all_h_v = []
tasselCounts_all_h_v = []
tasselDensities_all_h_v = []
dense_maps_all_h_v = []
for file_h in file_names_h:
    name_h, shape_h, tassels_h, tassel_dense_h, dense_map_h = get_density_maps_horizontal(file_h, image_path_h, xml_path_h, save_density_path_h)
    names_all_h_v.append(name_h)
    shapes_all_h_v.append(shape_h)
    tasselCounts_all_h_v.append(tassels_h)
    tasselDensities_all_h_v.append(tassel_dense_h)
    dense_maps_all_h_v.append(dense_map_h)

In [None]:
# a few sanity checks
np.mean(tasselCounts_all_h_v == np.round(tasselDensities_all_h_v))

In [None]:
# create a dataframe of the true counts
True_tasselcounts_df_horizontal_annot_test_13 = pd.DataFrame(zip(names_all_h_v, tasselCounts_all_h_v), columns = ['name', 'true_count'])

In [None]:
True_tasselcounts_df_horizontal_annot_test_13.shape

In [None]:
True_tasselcounts_df_horizontal_annot_test_13.head()

In [None]:
image_path_vn = 'Images_and_dense_maps_npy_test/vertical_not_annotated_images/Block_14'
save_density_path_vn = image_path_vn

In [None]:
file_names_vn = [file.split(".")[0] for file in os.listdir(image_path_vn) if file.split(".")[0][-3:] != 'map']
file_names_vn.sort()

In [None]:
len(file_names_vn)

In [None]:
file_names_vn

In [None]:
%%time
# get the density maps for all vertically annotated image files
names_all_vn_v = []
shapes_all_vn_v = []
tasselDensities_all_vn_v = []
dense_maps_all_vn_v = []
for file_vn in file_names_vn:
    name_vn, shape_vn, tassel_dense_vn, dense_map_vn = get_density_maps_not_annotated(file_vn, image_path_vn, save_density_path_vn)
    names_all_vn_v.append(name_vn)
    shapes_all_vn_v.append(shape_vn)
    tasselDensities_all_vn_v.append(tassel_dense_vn)
    dense_maps_all_vn_v.append(dense_map_vn)
    

In [None]:
Vertical_not_annotated_df_test_13 = pd.DataFrame(zip(names_all_vn_v, tasselDensities_all_vn_v), columns = ['name', 'true_count'])

In [None]:
Vertical_not_annotated_df_test_13.head()

In [None]:
Vertical_not_annotated_df_test_13.shape

In [None]:
# stack and save the dataframe of testset true counts
test_data_13 = pd.concat((Vertical_not_annotated_df_test_13, True_tasselcounts_df_vertical_annot_test_13, True_tasselcounts_df_horizontal_annot_test_13), axis = 0)

In [None]:
test_data_13.head()

In [None]:
test_data_13.shape

In [None]:
# saev this dataframe
test_data_13.to_csv("True_tassel_counts/test_data/true_test_counts_blk_14.csv", index = False)

In [None]:
# move all the image and the xml files to a single location

va_images_dense = 'Images_and_dense_maps_npy_test/vertical_annotated_images/Block_14'
vn_images_dense = 'Images_and_dense_maps_npy_test/vertical_not_annotated_images/Block_14'
h_images_dense = 'Images_and_dense_maps_npy_test/horizontal_annotated_images/Block_14'

In [None]:
shutil.copytree(va_images_dense, 'Preprocessed_test_data/all_img_density_files/Block_14', dirs_exist_ok=True)
shutil.copytree(vn_images_dense, 'Preprocessed_test_data/all_img_density_files/Block_14', dirs_exist_ok=True)
shutil.copytree(h_images_dense, 'Preprocessed_test_data/all_img_density_files/Block_14', dirs_exist_ok=True)

In [None]:
test_files_path_14 = "Preprocessed_test_data/all_img_density_files/Block_14"

In [None]:
test_im_and_map_contents_14 = os.listdir(test_files_path_14)

# sort these - ALWAYS sort these as the order is always messed up on HCC
test_im_and_map_contents_14.sort()

In [None]:
len(test_im_and_map_contents_14)

In [None]:
# get only the names of the image (npy files)
test_im_names_14 = [item for item in test_im_and_map_contents_14 if item.split(".")[0][-3:] != 'map']

In [None]:
# how many? should be 32
len(test_im_names_14)

In [None]:
# test_im_names_14

In [None]:
%%time
# get the subwindows and counts for test data

im_names = []
for image in test_im_names_14:
    test_ims, test_maps, test_names = create_subwindows_and_counts(image, test_files_path_14, stride = 8, kernel_size = 32)
    test_im_array = np.array(test_ims)
    test_map_array = np.array(test_maps)
    print(image, test_im_array.shape, test_map_array.shape)
    # save the arrays
    # image stack
    save_path = 'final_test_sub_windows_and_counts/Block_14'
    np.save(save_path + '/' + 'test_ims_' + image, test_im_array)
    # count stack
    np.save(save_path + '/' + 'test_counts_' + image, test_im_array)
    im_names.append(test_names)

Block 0105

In [None]:
# list all the image and the xml files for the train data
test_contents_15 = os.listdir(block_0105)
# separate image and xml files
img_files_15, xml_files_15 = separate_img_xml(test_contents_15)
# separate annotated and not annotated images
annot_img_15, not_annot_img_15 = separate_annot_and_not(img_files_15, xml_files_15)
# separate vertical and horizontal annotated images
annot_vertical_images_15, annot_vertical_xmls_15, annot_horizontal_images_15, annot_horizontal_xmls_15 = separate_annnot_vertical_horizontal(annot_img_15, 
                                                                                                                                             xml_files_15, 6)
# check shapes
print(check_annot(annot_vertical_images_15, annot_vertical_xmls_15, annot_horizontal_images_15, annot_horizontal_xmls_15))

In [None]:
# store the vertically annotated images - validation data
store_path = "Images_and_dense_maps_npy_test/vertical_annotated_images/Block_15"

# blk 24
im_shapes_annot_vertical_15 = []
for item in annot_vertical_images_15:
    img_shape = store_images_as_np_arrays_vertical(block_0105, item, store_path)
    im_shapes_annot_vertical_15.append(img_shape)
print("Block 15 done!")

In [None]:
print(im_shapes_annot_vertical_15)

In [None]:
# store the vertically not-annotated images
store_path = "Images_and_dense_maps_npy_test/vertical_not_annotated_images/Block_15"

# blk 15
im_shapes_notannot_vertical_15 = []
for item in not_annot_img_15:
    img_shape = store_images_as_np_arrays_vertical(block_0105, item, store_path)
    im_shapes_notannot_vertical_15.append(img_shape)
print("Block 15 done!")

In [None]:
print(im_shapes_notannot_vertical_15)

In [None]:
# store the horizontally annotated images
store_path = "Images_and_dense_maps_npy_test/horizontal_annotated_images/Block_15"

# blk 15 
im_shapes_annot_horizontal_15 = []
for item in annot_horizontal_images_15:
    img_shape = store_images_as_np_arrays_horizontal(block_0105, item, store_path)
    im_shapes_annot_horizontal_15.append(img_shape)
print("Block 15 done!")

In [None]:
print(im_shapes_annot_horizontal_15)

In [None]:
# xml_files_15

In [None]:
# Get the xml file paths in a single location
new_store_path = 'test_xml_files'
# blk 15
for file in xml_files_15:
    joined_path = os.path.join(block_0105, file)
    # store the file in new path
    shutil.copy(joined_path, new_store_path)

In [None]:
# get the outputs from this function
image_path = "Images_and_dense_maps_npy_test/vertical_annotated_images/Block_15/"
xml_path = 'test_xml_files'
save_density_path = image_path

In [None]:
file_names_v = [file.split(".")[0] for file in os.listdir(image_path) if file.split(".")[0][-3:] != 'map']
file_names_v.sort()

In [None]:
# file_names_v

In [None]:
%%time
names_all_va_v = []
shapes_all_va_v = []
tasselCounts_all_va_v = []
tasselDensities_all_va_v = []
dense_maps_all_va_v = []
for file_name in file_names_v:
    name_va, shape_va, tassels_va, tassel_dense_va, dense_map_va = get_density_maps_vertical(file_name, image_path, xml_path, save_density_path)
    names_all_va_v.append(name_va)
    shapes_all_va_v.append(shape_va)
    tasselCounts_all_va_v.append(tassels_va)
    tasselDensities_all_va_v.append(tassel_dense_va)
    dense_maps_all_va_v.append(dense_map_va)

In [None]:
# a few sanity checks
np.mean(tasselCounts_all_va_v == np.round(tasselDensities_all_va_v))

In [None]:
# create a dataframe of the true counts
True_tasselcounts_df_vertical_annot_test_15 = pd.DataFrame(zip(names_all_va_v, tasselCounts_all_va_v), columns = ['name', 'true_count'])

In [None]:
True_tasselcounts_df_vertical_annot_test_15.shape

In [None]:
True_tasselcounts_df_vertical_annot_test_15.head()

In [None]:
# get the outputs from this function
image_path_h = "Images_and_dense_maps_npy_test/horizontal_annotated_images/Block_15"
xml_path_h = 'test_xml_files'
save_density_path_h = image_path_h

In [None]:
file_names_h = [file.split(".")[0] for file in os.listdir(image_path_h) if file.split(".")[0][-3:] != 'map']
file_names_h.sort()

In [None]:
# file_names_h

In [None]:
%%time
names_all_h_v = []
shapes_all_h_v = []
tasselCounts_all_h_v = []
tasselDensities_all_h_v = []
dense_maps_all_h_v = []
for file_h in file_names_h:
    name_h, shape_h, tassels_h, tassel_dense_h, dense_map_h = get_density_maps_horizontal(file_h, image_path_h, xml_path_h, save_density_path_h)
    names_all_h_v.append(name_h)
    shapes_all_h_v.append(shape_h)
    tasselCounts_all_h_v.append(tassels_h)
    tasselDensities_all_h_v.append(tassel_dense_h)
    dense_maps_all_h_v.append(dense_map_h)

In [None]:
# a few sanity checks
np.mean(tasselCounts_all_h_v == np.round(tasselDensities_all_h_v))

In [None]:
# create a dataframe of the true counts
True_tasselcounts_df_horizontal_annot_test_15 = pd.DataFrame(zip(names_all_h_v, tasselCounts_all_h_v), columns = ['name', 'true_count'])

In [None]:
True_tasselcounts_df_horizontal_annot_test_15.shape

In [None]:
True_tasselcounts_df_horizontal_annot_test_15.head()

In [None]:
image_path_vn = 'Images_and_dense_maps_npy_test/vertical_not_annotated_images/Block_15'
save_density_path_vn = image_path_vn

In [None]:
file_names_vn = [file.split(".")[0] for file in os.listdir(image_path_vn) if file.split(".")[0][-3:] != 'map']
file_names_vn.sort()

In [None]:
len(file_names_vn)

In [None]:
file_names_vn

In [None]:
%%time
# get the density maps for all vertically annotated image files
names_all_vn_v = []
shapes_all_vn_v = []
tasselDensities_all_vn_v = []
dense_maps_all_vn_v = []
for file_vn in file_names_vn:
    name_vn, shape_vn, tassel_dense_vn, dense_map_vn = get_density_maps_not_annotated(file_vn, image_path_vn, save_density_path_vn)
    names_all_vn_v.append(name_vn)
    shapes_all_vn_v.append(shape_vn)
    tasselDensities_all_vn_v.append(tassel_dense_vn)
    dense_maps_all_vn_v.append(dense_map_vn)
    

In [None]:
Vertical_not_annotated_df_test_15 = pd.DataFrame(zip(names_all_vn_v, tasselDensities_all_vn_v), columns = ['name', 'true_count'])

In [None]:
Vertical_not_annotated_df_test_15.head()

In [None]:
Vertical_not_annotated_df_test_15.shape

In [None]:
# stack and save the dataframe of testset true counts
test_data_15 = pd.concat((Vertical_not_annotated_df_test_15, True_tasselcounts_df_vertical_annot_test_15, True_tasselcounts_df_horizontal_annot_test_15), axis = 0)

In [None]:
test_data_15.head()

In [None]:
test_data_15.shape

In [None]:
# saev this dataframe
test_data_15.to_csv("True_tassel_counts/test_data/true_test_counts_blk_15.csv", index = False)

In [None]:
# move all the image and the xml files to a single location

va_images_dense = 'Images_and_dense_maps_npy_test/vertical_annotated_images/Block_15'
vn_images_dense = 'Images_and_dense_maps_npy_test/vertical_not_annotated_images/Block_15'
h_images_dense = 'Images_and_dense_maps_npy_test/horizontal_annotated_images/Block_15'

In [None]:
shutil.copytree(va_images_dense, 'Preprocessed_test_data/all_img_density_files/Block_15', dirs_exist_ok=True)
shutil.copytree(vn_images_dense, 'Preprocessed_test_data/all_img_density_files/Block_15', dirs_exist_ok=True)
shutil.copytree(h_images_dense, 'Preprocessed_test_data/all_img_density_files/Block_15', dirs_exist_ok=True)

In [None]:
test_files_path_15 = "Preprocessed_test_data/all_img_density_files/Block_15"

In [None]:
test_im_and_map_contents_15 = os.listdir(test_files_path_15)

# sort these - ALWAYS sort these as the order is always messed up on HCC
test_im_and_map_contents_15.sort()

In [None]:
# test_im_and_map_contents_15

In [None]:
len(test_im_and_map_contents_15)

In [None]:
# get only the names of the image (npy files)
test_im_names_15 = [item for item in test_im_and_map_contents_15 if item.split(".")[0][-3:] != 'map']

In [None]:
# how many? should be 32
len(test_im_names_15)

In [None]:
# test_im_names_15

In [None]:
%%time
# get the subwindows and counts for test data

im_names = []
for image in test_im_names_15:
    test_ims, test_maps, test_names = create_subwindows_and_counts(image, test_files_path_15, stride = 8, kernel_size = 32)
    test_im_array = np.array(test_ims)
    test_map_array = np.array(test_maps)
    print(image, test_im_array.shape, test_map_array.shape)
    # save the arrays
    # image stack
    save_path = 'final_test_sub_windows_and_counts/Block_15'
    np.save(save_path + '/' + 'test_ims_' + image, test_im_array)
    # count stack
    np.save(save_path + '/' + 'test_counts_' + image, test_im_array)
    im_names.append(test_names)

Block 0106

In [None]:
# list all the image and the xml files for the train data
test_contents_16 = os.listdir(block_0106)
# separate image and xml files
img_files_16, xml_files_16 = separate_img_xml(test_contents_16)
# separate annotated and not annotated images
annot_img_16, not_annot_img_16 = separate_annot_and_not(img_files_16, xml_files_16)
# separate vertical and horizontal annotated images
annot_vertical_images_16, annot_vertical_xmls_16, annot_horizontal_images_16, annot_horizontal_xmls_16 = separate_annnot_vertical_horizontal(annot_img_16, 
                                                                                                                                             xml_files_16, 6)
# check shapes
print(check_annot(annot_vertical_images_16, annot_vertical_xmls_16, annot_horizontal_images_16, annot_horizontal_xmls_16))

In [None]:
# store the vertically annotated images - validation data
store_path = "Images_and_dense_maps_npy_test/vertical_annotated_images/Block_16"

# blk 24
im_shapes_annot_vertical_16 = []
for item in annot_vertical_images_16:
    img_shape = store_images_as_np_arrays_vertical(block_0106, item, store_path)
    im_shapes_annot_vertical_16.append(img_shape)
print("Block 16 done!")

In [None]:
print(im_shapes_annot_vertical_16)

In [None]:
# store the vertically not-annotated images
store_path = "Images_and_dense_maps_npy_test/vertical_not_annotated_images/Block_16"

# blk 15
im_shapes_notannot_vertical_16 = []
for item in not_annot_img_16:
    img_shape = store_images_as_np_arrays_vertical(block_0106, item, store_path)
    im_shapes_notannot_vertical_16.append(img_shape)
print("Block 16 done!")

In [None]:
print(im_shapes_notannot_vertical_16)

In [None]:
# store the horizontally annotated images
store_path = "Images_and_dense_maps_npy_test/horizontal_annotated_images/Block_16"

# blk 15 
im_shapes_annot_horizontal_16 = []
for item in annot_horizontal_images_16:
    img_shape = store_images_as_np_arrays_horizontal(block_0106, item, store_path)
    im_shapes_annot_horizontal_16.append(img_shape)
print("Block 16 done!")

In [None]:
print(im_shapes_annot_horizontal_16)

In [None]:
# xml_files_15

In [None]:
# Get the xml file paths in a single location
new_store_path = 'test_xml_files'
# blk 15
for file in xml_files_16:
    joined_path = os.path.join(block_0106, file)
    # store the file in new path
    shutil.copy(joined_path, new_store_path)

In [None]:
# get the outputs from this function
image_path = "Images_and_dense_maps_npy_test/vertical_annotated_images/Block_16/"
xml_path = 'test_xml_files'
save_density_path = image_path

In [None]:
file_names_v = [file.split(".")[0] for file in os.listdir(image_path) if file.split(".")[0][-3:] != 'map']
file_names_v.sort()

In [None]:
# file_names_v

In [None]:
%%time
names_all_va_v = []
shapes_all_va_v = []
tasselCounts_all_va_v = []
tasselDensities_all_va_v = []
dense_maps_all_va_v = []
for file_name in file_names_v:
    name_va, shape_va, tassels_va, tassel_dense_va, dense_map_va = get_density_maps_vertical(file_name, image_path, xml_path, save_density_path)
    names_all_va_v.append(name_va)
    shapes_all_va_v.append(shape_va)
    tasselCounts_all_va_v.append(tassels_va)
    tasselDensities_all_va_v.append(tassel_dense_va)
    dense_maps_all_va_v.append(dense_map_va)

In [None]:
# a few sanity checks
np.mean(tasselCounts_all_va_v == np.round(tasselDensities_all_va_v))

In [None]:
# create a dataframe of the true counts
True_tasselcounts_df_vertical_annot_test_16 = pd.DataFrame(zip(names_all_va_v, tasselCounts_all_va_v), columns = ['name', 'true_count'])

In [None]:
True_tasselcounts_df_vertical_annot_test_16.shape

In [None]:
True_tasselcounts_df_vertical_annot_test_16.head()

In [None]:
# get the outputs from this function
image_path_h = "Images_and_dense_maps_npy_test/horizontal_annotated_images/Block_16"
xml_path_h = 'test_xml_files'
save_density_path_h = image_path_h

In [None]:
file_names_h = [file.split(".")[0] for file in os.listdir(image_path_h) if file.split(".")[0][-3:] != 'map']
file_names_h.sort()

In [None]:
# file_names_h

In [None]:
%%time
names_all_h_v = []
shapes_all_h_v = []
tasselCounts_all_h_v = []
tasselDensities_all_h_v = []
dense_maps_all_h_v = []
for file_h in file_names_h:
    name_h, shape_h, tassels_h, tassel_dense_h, dense_map_h = get_density_maps_horizontal(file_h, image_path_h, xml_path_h, save_density_path_h)
    names_all_h_v.append(name_h)
    shapes_all_h_v.append(shape_h)
    tasselCounts_all_h_v.append(tassels_h)
    tasselDensities_all_h_v.append(tassel_dense_h)
    dense_maps_all_h_v.append(dense_map_h)

In [None]:
# a few sanity checks
np.mean(tasselCounts_all_h_v == np.round(tasselDensities_all_h_v))

In [None]:
# create a dataframe of the true counts
True_tasselcounts_df_horizontal_annot_test_16 = pd.DataFrame(zip(names_all_h_v, tasselCounts_all_h_v), columns = ['name', 'true_count'])

In [None]:
True_tasselcounts_df_horizontal_annot_test_16.shape

In [None]:
True_tasselcounts_df_horizontal_annot_test_16.head()

In [None]:
image_path_vn = 'Images_and_dense_maps_npy_test/vertical_not_annotated_images/Block_16'
save_density_path_vn = image_path_vn

In [None]:
file_names_vn = [file.split(".")[0] for file in os.listdir(image_path_vn) if file.split(".")[0][-3:] != 'map']
file_names_vn.sort()

In [None]:
len(file_names_vn)

In [None]:
file_names_vn

In [None]:
%%time
# get the density maps for all vertically annotated image files
names_all_vn_v = []
shapes_all_vn_v = []
tasselDensities_all_vn_v = []
dense_maps_all_vn_v = []
for file_vn in file_names_vn:
    name_vn, shape_vn, tassel_dense_vn, dense_map_vn = get_density_maps_not_annotated(file_vn, image_path_vn, save_density_path_vn)
    names_all_vn_v.append(name_vn)
    shapes_all_vn_v.append(shape_vn)
    tasselDensities_all_vn_v.append(tassel_dense_vn)
    dense_maps_all_vn_v.append(dense_map_vn)
    

In [None]:
Vertical_not_annotated_df_test_16 = pd.DataFrame(zip(names_all_vn_v, tasselDensities_all_vn_v), columns = ['name', 'true_count'])

In [None]:
Vertical_not_annotated_df_test_16.head()

In [None]:
Vertical_not_annotated_df_test_16.shape

In [None]:
# stack and save the dataframe of testset true counts
test_data_16 = pd.concat((Vertical_not_annotated_df_test_16, True_tasselcounts_df_vertical_annot_test_16, True_tasselcounts_df_horizontal_annot_test_16), axis = 0)

In [None]:
test_data_16.head()

In [None]:
test_data_16.shape

In [None]:
# saev this dataframe
test_data_16.to_csv("True_tassel_counts/test_data/true_test_counts_blk_16.csv", index = False)

In [None]:
# move all the image and the xml files to a single location

va_images_dense = 'Images_and_dense_maps_npy_test/vertical_annotated_images/Block_16'
vn_images_dense = 'Images_and_dense_maps_npy_test/vertical_not_annotated_images/Block_16'
h_images_dense = 'Images_and_dense_maps_npy_test/horizontal_annotated_images/Block_16'

In [None]:
shutil.copytree(va_images_dense, 'Preprocessed_test_data/all_img_density_files/Block_16', dirs_exist_ok=True)
shutil.copytree(vn_images_dense, 'Preprocessed_test_data/all_img_density_files/Block_16', dirs_exist_ok=True)
shutil.copytree(h_images_dense, 'Preprocessed_test_data/all_img_density_files/Block_16', dirs_exist_ok=True)

In [None]:
test_files_path_16 = "Preprocessed_test_data/all_img_density_files/Block_16"

In [None]:
test_im_and_map_contents_16 = os.listdir(test_files_path_16)

# sort these - ALWAYS sort these as the order is always messed up on HCC
test_im_and_map_contents_16.sort()

In [None]:
# test_im_and_map_contents_16

In [None]:
len(test_im_and_map_contents_16)

In [None]:
# get only the names of the image (npy files)
test_im_names_16 = [item for item in test_im_and_map_contents_16 if item.split(".")[0][-3:] != 'map']

In [None]:
# how many? should be 32
len(test_im_names_16)

In [None]:
# test_im_names_16

In [None]:
%%time
# get the subwindows and counts for test data

im_names = []
for image in test_im_names_16:
    test_ims, test_maps, test_names = create_subwindows_and_counts(image, test_files_path_16, stride = 8, kernel_size = 32)
    test_im_array = np.array(test_ims)
    test_map_array = np.array(test_maps)
    print(image, test_im_array.shape, test_map_array.shape)
    # save the arrays
    # image stack
    save_path = 'final_test_sub_windows_and_counts/Block_16'
    np.save(save_path + '/' + 'test_ims_' + image, test_im_array)
    # count stack
    np.save(save_path + '/' + 'test_counts_' + image, test_im_array)
    im_names.append(test_names)

Block 0201

In [None]:
# list all the image and the xml files for the train data
test_contents_21 = os.listdir(block_0201)
# separate image and xml files
img_files_21, xml_files_21 = separate_img_xml(test_contents_21)
# separate annotated and not annotated images
annot_img_21, not_annot_img_21 = separate_annot_and_not(img_files_21, xml_files_21)
# separate vertical and horizontal annotated images
annot_vertical_images_21, annot_vertical_xmls_21, annot_horizontal_images_21, annot_horizontal_xmls_21 = separate_annnot_vertical_horizontal(annot_img_21, 
                                                                                                                                             xml_files_21, 5)
# check shapes
print(check_annot(annot_vertical_images_21, annot_vertical_xmls_21, annot_horizontal_images_21, annot_horizontal_xmls_21))

In [None]:
# store the vertically annotated images - validation data
store_path = "Images_and_dense_maps_npy_test/vertical_annotated_images/Block_21"

# blk 24
im_shapes_annot_vertical_21 = []
for item in annot_vertical_images_21:
    img_shape = store_images_as_np_arrays_vertical(block_0201, item, store_path)
    im_shapes_annot_vertical_21.append(img_shape)
print("Block 21 done!")

In [None]:
print(im_shapes_annot_vertical_21)

In [None]:
# store the vertically not-annotated images
store_path = "Images_and_dense_maps_npy_test/vertical_not_annotated_images/Block_21"

# blk 21
im_shapes_notannot_vertical_21 = []
for item in not_annot_img_21:
    img_shape = store_images_as_np_arrays_vertical(block_0201, item, store_path)
    im_shapes_notannot_vertical_21.append(img_shape)
print("Block 21 done!")

In [None]:
print(im_shapes_notannot_vertical_21)

In [None]:
# store the horizontally annotated images
store_path = "Images_and_dense_maps_npy_test/horizontal_annotated_images/Block_21"

# blk 15 
im_shapes_annot_horizontal_21 = []
for item in annot_horizontal_images_21:
    img_shape = store_images_as_np_arrays_horizontal(block_0201, item, store_path)
    im_shapes_annot_horizontal_21.append(img_shape)
print("Block 21 done!")

In [None]:
print(im_shapes_annot_horizontal_21)

In [None]:
# xml_files_15

In [None]:
# Get the xml file paths in a single location
new_store_path = 'test_xml_files'
# blk 15
for file in xml_files_21:
    joined_path = os.path.join(block_0201, file)
    # store the file in new path
    shutil.copy(joined_path, new_store_path)

In [None]:
# get the outputs from this function
image_path = "Images_and_dense_maps_npy_test/vertical_annotated_images/Block_21/"
xml_path = 'test_xml_files'
save_density_path = image_path

In [None]:
file_names_v = [file.split(".")[0] for file in os.listdir(image_path) if file.split(".")[0][-3:] != 'map']
file_names_v.sort()

In [None]:
# file_names_v

In [None]:
%%time
names_all_va_v = []
shapes_all_va_v = []
tasselCounts_all_va_v = []
tasselDensities_all_va_v = []
dense_maps_all_va_v = []
for file_name in file_names_v:
    name_va, shape_va, tassels_va, tassel_dense_va, dense_map_va = get_density_maps_vertical(file_name, image_path, xml_path, save_density_path)
    names_all_va_v.append(name_va)
    shapes_all_va_v.append(shape_va)
    tasselCounts_all_va_v.append(tassels_va)
    tasselDensities_all_va_v.append(tassel_dense_va)
    dense_maps_all_va_v.append(dense_map_va)

In [None]:
# a few sanity checks
np.mean(tasselCounts_all_va_v == np.round(tasselDensities_all_va_v))

In [None]:
# create a dataframe of the true counts
True_tasselcounts_df_vertical_annot_test_21 = pd.DataFrame(zip(names_all_va_v, tasselCounts_all_va_v), columns = ['name', 'true_count'])

In [None]:
True_tasselcounts_df_vertical_annot_test_21.shape

In [None]:
True_tasselcounts_df_vertical_annot_test_21.head()

In [None]:
# get the outputs from this function
image_path_h = "Images_and_dense_maps_npy_test/horizontal_annotated_images/Block_21"
xml_path_h = 'test_xml_files'
save_density_path_h = image_path_h

In [None]:
file_names_h = [file.split(".")[0] for file in os.listdir(image_path_h) if file.split(".")[0][-3:] != 'map']
file_names_h.sort()

In [None]:
# file_names_h

In [None]:
%%time
names_all_h_v = []
shapes_all_h_v = []
tasselCounts_all_h_v = []
tasselDensities_all_h_v = []
dense_maps_all_h_v = []
for file_h in file_names_h:
    name_h, shape_h, tassels_h, tassel_dense_h, dense_map_h = get_density_maps_horizontal(file_h, image_path_h, xml_path_h, save_density_path_h)
    names_all_h_v.append(name_h)
    shapes_all_h_v.append(shape_h)
    tasselCounts_all_h_v.append(tassels_h)
    tasselDensities_all_h_v.append(tassel_dense_h)
    dense_maps_all_h_v.append(dense_map_h)

In [None]:
# a few sanity checks
np.mean(tasselCounts_all_h_v == np.round(tasselDensities_all_h_v))

In [None]:
# create a dataframe of the true counts
True_tasselcounts_df_horizontal_annot_test_21 = pd.DataFrame(zip(names_all_h_v, tasselCounts_all_h_v), columns = ['name', 'true_count'])

In [None]:
True_tasselcounts_df_horizontal_annot_test_21.shape

In [None]:
True_tasselcounts_df_horizontal_annot_test_21.head()

In [None]:
image_path_vn = 'Images_and_dense_maps_npy_test/vertical_not_annotated_images/Block_21'
save_density_path_vn = image_path_vn

In [None]:
file_names_vn = [file.split(".")[0] for file in os.listdir(image_path_vn) if file.split(".")[0][-3:] != 'map']
file_names_vn.sort()

In [None]:
len(file_names_vn)

In [None]:
file_names_vn

In [None]:
%%time
# get the density maps for all vertically annotated image files
names_all_vn_v = []
shapes_all_vn_v = []
tasselDensities_all_vn_v = []
dense_maps_all_vn_v = []
for file_vn in file_names_vn:
    name_vn, shape_vn, tassel_dense_vn, dense_map_vn = get_density_maps_not_annotated(file_vn, image_path_vn, save_density_path_vn)
    names_all_vn_v.append(name_vn)
    shapes_all_vn_v.append(shape_vn)
    tasselDensities_all_vn_v.append(tassel_dense_vn)
    dense_maps_all_vn_v.append(dense_map_vn)
    

In [None]:
Vertical_not_annotated_df_test_21 = pd.DataFrame(zip(names_all_vn_v, tasselDensities_all_vn_v), columns = ['name', 'true_count'])

In [None]:
Vertical_not_annotated_df_test_21.head()

In [None]:
Vertical_not_annotated_df_test_21.shape

In [None]:
# stack and save the dataframe of testset true counts
test_data_21 = pd.concat((Vertical_not_annotated_df_test_21, True_tasselcounts_df_vertical_annot_test_21, True_tasselcounts_df_horizontal_annot_test_21), axis = 0)

In [None]:
test_data_21.head()

In [None]:
test_data_21.shape

In [None]:
# saev this dataframe
test_data_21.to_csv("True_tassel_counts/test_data/true_test_counts_blk_21.csv", index = False)

In [None]:
# move all the image and the xml files to a single location

va_images_dense = 'Images_and_dense_maps_npy_test/vertical_annotated_images/Block_21'
vn_images_dense = 'Images_and_dense_maps_npy_test/vertical_not_annotated_images/Block_21'
h_images_dense = 'Images_and_dense_maps_npy_test/horizontal_annotated_images/Block_21'

In [None]:
shutil.copytree(va_images_dense, 'Preprocessed_test_data/all_img_density_files/Block_21', dirs_exist_ok=True)
shutil.copytree(vn_images_dense, 'Preprocessed_test_data/all_img_density_files/Block_21', dirs_exist_ok=True)
shutil.copytree(h_images_dense, 'Preprocessed_test_data/all_img_density_files/Block_21', dirs_exist_ok=True)

In [None]:
test_files_path_21 = "Preprocessed_test_data/all_img_density_files/Block_21"

In [None]:
test_im_and_map_contents_21 = os.listdir(test_files_path_21)

# sort these - ALWAYS sort these as the order is always messed up on HCC
test_im_and_map_contents_21.sort()

In [None]:
# test_im_and_map_contents_15

In [None]:
len(test_im_and_map_contents_21)

In [None]:
# get only the names of the image (npy files)
test_im_names_21 = [item for item in test_im_and_map_contents_21 if item.split(".")[0][-3:] != 'map']

In [None]:
# how many? should be 32
len(test_im_names_21)

In [None]:
# test_im_names_21

In [None]:
%%time
# get the subwindows and counts for test data

im_names = []
for image in test_im_names_21:
    test_ims, test_maps, test_names = create_subwindows_and_counts(image, test_files_path_21, stride = 8, kernel_size = 32)
    test_im_array = np.array(test_ims)
    test_map_array = np.array(test_maps)
    print(image, test_im_array.shape, test_map_array.shape)
    # save the arrays
    # image stack
    save_path = 'final_test_sub_windows_and_counts/Block_21'
    np.save(save_path + '/' + 'test_ims_' + image, test_im_array)
    # count stack
    np.save(save_path + '/' + 'test_counts_' + image, test_im_array)
    im_names.append(test_names)

Rest of the test data preprcessing is in the next notebook