In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import os
from scipy import ndimage
import xml.etree.ElementTree as ET
import cv2
import warnings
from skimage.transform import resize

In [None]:
# check for GPU
tf.config.list_physical_devices("GPU")

A function to create non-overlapping sub images and respcetive density counts

In [None]:
# Steps

# 1. need to read the images
# 2. create the non-overlapping subwindows for the images
# 3. create the density maps
# 4. non-overlapping counts

# Let's create functions for all these, so that it would be easy to prepocess the data

In [None]:
# We will avoid the blocks on which the model was trained and validated - 101, 102, 203, 301, 204

In [None]:
# Let's do this first with block 0103

In [None]:
# Some of these maybe generic - but some may need to be written specifically to some images in the blocks

In [None]:
# Most images are vertical and horizontal, let's go with that

In [None]:
# separate the xml and image files
def separate_images_and_xml(location_to_contents):
    all_contents = os.listdir(location_to_contents)
    all_contents.sort()

    all_images = []
    all_xmls = []
    for item in all_contents:
        if item.split(".")[-1] == 'xml':
            all_xmls.append(item)
        else:
            all_images.append(item)

    return all_images, all_xmls

In [None]:
# okay, what's next?
# may be separating the annotated and not annotated images?
def separate_annotated_and_not(image_list, xml_list):
    annotated_images = []
    not_annotated_images = []
    for item in image_list:
        if item.split('.')[0] + '.xml' in xml_list:
            annotated_images.append(item)
        else:
            not_annotated_images.append(item)

    return(annotated_images, not_annotated_images) 

In [None]:
# separate the vertical and horizontal annotated images - This is the easiest way to go - this might be a problem in blocks 3 though, be careful.

def separate_annnot_vertical_horizontal(annotated_images, all_xml_files, n):
    annotated_images.sort()
    all_xml_files.sort()
    print(len(annotated_images), len(all_xml_files))
    annot_vertical_images = annotated_images[:n]
    annot_vertical_xmls = all_xml_files[:n]
    annot_horizontal_images = annotated_images[n:]
    annot_horizontal_xmls = all_xml_files[n:]
    return(annot_vertical_images, annot_vertical_xmls, annot_horizontal_images, annot_horizontal_xmls)

In [None]:
# define a check function to make sure the image and xml file names match and are in order
def check_matching(im_list, xml_list):
    im_list.sort()
    xml_list.sort()
    im_names = [im.split('.')[0] for im in im_list]
    xml_names = [xml.split('.')[0] for xml in xml_list]
    print(im_names == xml_names)

##### Note here that since our subwindows need to make sense sequantially, it does not make sense to inlcude both vertical and horizontal images. Let's only pick the horizontal images, and let's do the forthcoming manipulations only for the horizontal images

In [None]:
# create density maps for the horizontally annotated images
def get_density_maps_horizontal(file_name, im_xml_path):
    xml_file = file_name + '.xml'
    xml_file_path = os.path.join(im_xml_path, xml_file)

    # Get coords from the xml file
    # parse the xml file
    parsed_file = ET.parse(xml_file_path)
    # get the roots
    root = parsed_file.getroot()
    # get the roots here
    coords = []
    for child in root:
        for i in child:
            for j in i:
                coords.append(int(j.text))
    
    # chunk the points into sets of 4 - these are the coordinates of the bounding boxes
    points_tupples = []
    for i in range(0, len(coords), 4):
        points_tupples.append(coords[i:i + 4])

    # make a dataframe with these points
    coords_df = pd.DataFrame(points_tupples, columns = ["bleft_x", "bleft_y", "tright_x", "tright_y"])

    # compute the number of tassels in each image
    no_of_tassels = len(points_tupples)

    # compute the mid coordinates
    coords_df["mid_x"] = (round(0.5*(coords_df["bleft_x"] + coords_df["tright_x"]))).astype(int)
    coords_df["mid_y"] = (round(0.5*(coords_df["bleft_y"] + coords_df["tright_y"]))).astype(int)

    # extract the mid cordinates
    mid_coords = coords_df[["mid_x", "mid_y"]]
    # cap the coords at the max height and width values
    mid_coords.loc[mid_coords['mid_x'] > 1024, 'mid_x'] = 1023
    mid_coords.loc[mid_coords['mid_y'] > 768, 'mid_y'] = 767
    warnings.filterwarnings("ignore")

    # plot the bounding boxes on images
    # get image name and path - what if we give this the original image path instead of the npy location - because we really do not need the npy files for the exercise
    image_name = file_name + '.jpeg'
    print(image_name)
    imge_file_path = os.path.join(im_xml_path, image_name)
    # read the image
    read_image = plt.imread(imge_file_path)
    read_image = ndimage.rotate(read_image, 0)
    # check the shape of the read image
    read_image_shape = read_image.shape
    #  plot the bounding boxes on the image
    for points in points_tupples:
        annotated_image = cv2.rectangle(read_image, (points[0],points[1]), (points[2],points[3]), color = (255,0,0), thickness = 2)
    # plt.figure(figsize = (12,18))
    plt.imshow(annotated_image)
    plt.show()

    # plot the mid points on the image
    coords_list = mid_coords.values.tolist()
    # read the image again
    read_image_again = plt.imread(imge_file_path)
    read_image_again = ndimage.rotate(read_image_again, 0)
    # draw the circles on image
    for i in coords_list:
        image_with_mids = cv2.circle(read_image_again, i, radius=5, color=(255, 0, 0), thickness=-1)
    # look at the annotated image
    # plt.figure(figsize = (12,18))
    plt.imshow(image_with_mids)
    plt.show()

    # also try creating the density map here
    # first create the empty maps
    np_image = np.zeros((read_image_shape[0], read_image_shape[1]))
    # get the dot maps
    for point in coords_list:
        np_image[point[1], point[0]] = 1
    # plot the image
    # plt.figure(figsize = (12,18))
    plt.imshow(np_image, cmap = "Greys")
    plt.show()

    # now define the kernel and run the convolution
    one_d_kerenel = cv2.getGaussianKernel(50,5)
    two_d_kernel = np.multiply(one_d_kerenel.T, one_d_kerenel)

    # Shape of the 2D kernel
    twoD_shape = two_d_kernel.shape
        
    # do the convolution
    convolution = ndimage.convolve(np_image, two_d_kernel)
        
    # plot the density map
    # plt.figure(figsize = (12,18))
    plt.imshow(convolution, cmap = "Greys")
    plt.show()
        
    # get the sums of the images
    img_sum = np.sum(convolution)

    # # save the density map - and maybe not needed to save the density map?
    # np.save(save_density_path + '/' + file_name + '_density_map.npy', convolution)

    return(file_name, read_image_shape, no_of_tassels, img_sum, convolution)

In [None]:
def create_subwindows_and_counts(im_folder_loc, image_name, conv_map, save_folder_name, stride = 300, kernel_size = 300):
    im_name = image_name + '.jpeg'
    joined_im_path = os.path.join(im_folder_loc, im_name)
    # load the image and the count numpy files
    loaded_im_file = plt.imread(joined_im_path)
        
    # create the subwindows and counts as follows
    img_height = loaded_im_file.shape[0]
    img_width = loaded_im_file.shape[1]
    
    density_sums = []
    catch_sub_image_name = []
    catch_dense_name = []
    sub_image_shapes = []
    sub_count_shapes = []
    counter = 0
    for i in  range(0, img_height, stride):
        for j in range(0, img_width, stride):
            sub_window = loaded_im_file[i: i + kernel_size, j : j + kernel_size,:]
            # resize the subwindow - for 300*300
            sub_window = resize(sub_window, (300, 300,3))
            sub_image_shapes.append(sub_window.shape)
            # save the sub window? 
            sub_window_name = image_name + '_' + str(counter) + '.npy'
            save_subwindow_path = save_folder_name + '/' + sub_window_name
            np.save(save_subwindow_path, sub_window)
            catch_sub_image_name.append(sub_window_name)
            
            density = conv_map[i: i + kernel_size, j : j + kernel_size]
            sub_count_shapes.append(density.shape)
            dense_sum = np.sum(density)
            density_sums.append(dense_sum)
            # save density?
            density_name = image_name + '_' + str(counter) + '_density.npy'
            catch_dense_name.append(density_name)
            save_density_path = save_folder_name + '/' + density_name
            np.save(save_density_path, density)
            counter += 1

    # save the combined subwindows and counts
    return(catch_sub_image_name, catch_dense_name, density_sums, sub_image_shapes, sub_count_shapes)

Block 0103

In [None]:
# test the funciton
path_to_contents = "../../S_lab_TasselNet/Block_3_TN/Block_3_images_and_xml"

In [None]:
all_ims_1, all_xmls_1 = separate_images_and_xml(path_to_contents)

In [None]:
# the function works, check the lengths, the images will be more than the xml files
len(all_xmls_1), len(all_ims_1)

In [None]:
# the function works!
blk1_annot, blk1_not_annot = separate_annotated_and_not(all_ims_1, all_xmls_1)

In [None]:
# blk1_annot

In [None]:
# blk1_not_annot

In [None]:
# test the function for block 1
a_vi, a_vx, a_hi, a_hx = separate_annnot_vertical_horizontal(blk1_annot, all_xmls_1, 6)

In [None]:
# The function is working
check_matching(a_vi, a_vx), check_matching(a_hi, a_hx)

In [None]:
# store the images as np arrays and create the density maps - I think the images are store here so that they can be later used to create the subwindows
# may need to write out separate functions for special cases of images
# let's write out the generic ones first, and later deal with these

In [None]:
all_horizontal_image_names = [file.split(".")[0] for file in a_hi]
all_horizontal_image_names.sort()

In [None]:
print(all_horizontal_image_names)

In [None]:
len(all_horizontal_image_names)

In [None]:
%%time
catch_all = []
for i in all_horizontal_image_names:
    contents_i = get_density_maps_horizontal(i, path_to_contents)
    catch_all.append(contents_i)

In [None]:
%%time
# just make sure that for each images the real counts and the convolved counts match
blk_3_img_names = []
blk_3_img_shapes = []
blk_3_real_counts = []
blk_3_convolved_counts = []
blk_3_convolutions = []
for i in range(len(catch_all)):
    blk_3_img_names.append(catch_all[i][0])
    blk_3_img_shapes.append(catch_all[i][1])
    blk_3_real_counts.append(catch_all[i][2])
    blk_3_convolved_counts.append(catch_all[i][3])
    blk_3_convolutions.append(catch_all[i][4])


In [None]:
print(blk_3_img_shapes)

In [None]:
np.round(blk_3_convolved_counts,0)

In [None]:
print(blk_3_real_counts)

In [None]:
np.mean(blk_3_real_counts == np.round(blk_3_convolved_counts, 0))

In [None]:
print(blk_3_img_names)

In [None]:
save_subs_path = 'all_preprocessed_data/Block_0103/sub_images_and_counts'

In [None]:
%%time
catch_everything = []
for item in range(len(blk_3_img_names)):
    all_of_it = create_subwindows_and_counts(path_to_contents, blk_3_img_names[item], blk_3_convolutions[item], save_subs_path, stride = 300, kernel_size = 300)
    catch_everything.append(all_of_it)

In [None]:
# how many files in here - that divided by 2*20 would be the number of TS sequences that we will have
how_many = os.listdir(save_subs_path)
len(how_many)/(2*20)

In [None]:
# we will create and save dataframes with the subwindow names and the subcounts image-wise for now - later look into dataframe combinations
counter = 0
for i in range(len(catch_everything)):
    # create a dataframe
    data_frame = pd.DataFrame(zip(catch_everything[i][0], catch_everything[i][2]), columns = ['subwindow_name', 'sub_counts'])
    df_name = 'subcount_df_' + str(counter) + '.csv'
    df_path = 'all_preprocessed_data/Block_0103/sub_count_dfs/' + df_name
    # save the dataframe
    data_frame.to_csv(df_path, index = False)
    counter += 1 

In [None]:
# What more to verify here? - check the shapes of the subwindows and sub count maps
print(catch_everything[0][3])

In [None]:
print(catch_everything[0][4])

We need the data preprocessed for all the other blocks as well. Let's do that.

Block 0104

In [None]:
# test the funciton
path_to_contents_blk_4 = "../../S_lab_TasselNet/Block_4_TN/Block_4_images_and_xml"

In [None]:
all_ims_4, all_xmls_4 = separate_images_and_xml(path_to_contents_blk_4)

In [None]:
# the function works, check the lengths, the images will be more than the xml files
len(all_ims_4), len(all_xmls_4)

In [None]:
# all_ims_4

In [None]:
# all_xmls_4

In [None]:
# the function works!
blk4_annot, blk4_not_annot = separate_annotated_and_not(all_ims_4, all_xmls_4)

In [None]:
# blk4_annot

In [None]:
# blk4_not_annot

In [None]:
# test the function for block 1
a_vi_4, a_vx_4, a_hi_4, a_hx_4 = separate_annnot_vertical_horizontal(blk4_annot, all_xmls_4, 6)

In [None]:
# The function is working
check_matching(a_vi_4, a_vx_4), check_matching(a_hi_4, a_hx_4)

In [None]:
len(a_vi_4), len(a_vx_4), len(a_hi_4), len(a_hx_4)

In [None]:
# store the images as np arrays and create the density maps - I think the images are store here so that they can be later used to create the subwindows
# may need to write out separate functions for special cases of images
# let's write out the generic ones first, and later deal with these

In [None]:
all_horizontal_image_names_4 = [file.split(".")[0] for file in a_hi_4]
all_horizontal_image_names_4.sort()

In [None]:
print(all_horizontal_image_names_4)

In [None]:
len(all_horizontal_image_names_4)

In [None]:
%%time
catch_all_4 = []
for i in all_horizontal_image_names_4:
    contents_i = get_density_maps_horizontal(i, path_to_contents_blk_4)
    catch_all_4.append(contents_i)

In [None]:
%%time
# just make sure that for each images the real counts and the convolved counts match
blk_4_img_names = []
blk_4_img_shapes = []
blk_4_real_counts = []
blk_4_convolved_counts = []
blk_4_convolutions = []
for i in range(len(catch_all_4)):
    blk_4_img_names.append(catch_all_4[i][0])
    blk_4_img_shapes.append(catch_all_4[i][1])
    blk_4_real_counts.append(catch_all_4[i][2])
    blk_4_convolved_counts.append(catch_all_4[i][3])
    blk_4_convolutions.append(catch_all_4[i][4])


In [None]:
print(blk_4_img_shapes)

In [None]:
np.round(blk_4_convolved_counts, 0)

In [None]:
print(blk_4_real_counts)

In [None]:
np.mean(blk_4_real_counts == np.round(blk_4_convolved_counts, 0))

In [None]:
print(blk_4_img_names)

In [None]:
save_subs_path_4 = 'all_preprocessed_data/Block_0104/sub_images_and_counts'

In [None]:
%%time
catch_everything_4 = []
for item in range(len(blk_4_img_names)):
    all_of_it_4 = create_subwindows_and_counts(path_to_contents_blk_4, blk_4_img_names[item], blk_4_convolutions[item], save_subs_path_4, stride = 300, kernel_size = 300)
    catch_everything_4.append(all_of_it_4)

In [None]:
# how many files in here - that divided by 2*20 would be the number of TS sequences that we will have
how_many = os.listdir(save_subs_path_4)
len(how_many)/(2*20)

In [None]:
%%time
# we will create and save dataframes with the subwindow names and the subcounts image-wise for now - later look into dataframe combinations
counter = 0
for i in range(len(catch_everything_4)):
    # create a dataframe
    data_frame = pd.DataFrame(zip(catch_everything_4[i][0], catch_everything_4[i][2]), columns = ['subwindow_name', 'sub_counts'])
    df_name = 'subcount_df_' + str(counter) + '.csv'
    df_path = 'all_preprocessed_data/Block_0104/sub_count_dfs/' + df_name
    # save the dataframe
    data_frame.to_csv(df_path, index = False)
    counter += 1 

In [None]:
len(catch_everything_4)

Block 0105

In [None]:
# test the funciton
path_to_contents_blk_5 = "../../S_lab_TasselNet/Block_5_TN/Block_5_images_and_xml"

In [None]:
all_ims_5, all_xmls_5 = separate_images_and_xml(path_to_contents_blk_5)

In [None]:
# the function works, check the lengths, the images will be more than the xml files
len(all_ims_5), len(all_xmls_5)

In [None]:
# all_ims_5

In [None]:
# all_xmls_5

In [None]:
# the function works!
blk5_annot, blk5_not_annot = separate_annotated_and_not(all_ims_5, all_xmls_5)

In [None]:
# blk5_annot

In [None]:
# blk5_not_annot

In [None]:
# test the function for block 1
a_vi_5, a_vx_5, a_hi_5, a_hx_5 = separate_annnot_vertical_horizontal(blk5_annot, all_xmls_5, 6)

In [None]:
# The function is working
check_matching(a_vi_5, a_vx_5), check_matching(a_hi_5, a_hx_5)

In [None]:
len(a_vi_5), len(a_vx_5), len(a_hi_5), len(a_hx_5)

In [None]:
all_horizontal_image_names_5 = [file.split(".")[0] for file in a_hi_5]
all_horizontal_image_names_5.sort()

In [None]:
print(all_horizontal_image_names_5)

In [None]:
len(all_horizontal_image_names_5)

In [None]:
%%time
catch_all_5 = []
for i in all_horizontal_image_names_5:
    contents_i = get_density_maps_horizontal(i, path_to_contents_blk_5)
    catch_all_5.append(contents_i)

In [None]:
%%time
# just make sure that for each images the real counts and the convolved counts match
blk_5_img_names = []
blk_5_img_shapes = []
blk_5_real_counts = []
blk_5_convolved_counts = []
blk_5_convolutions = []
for i in range(len(catch_all_5)):
    blk_5_img_names.append(catch_all_5[i][0])
    blk_5_img_shapes.append(catch_all_5[i][1])
    blk_5_real_counts.append(catch_all_5[i][2])
    blk_5_convolved_counts.append(catch_all_5[i][3])
    blk_5_convolutions.append(catch_all_5[i][4])


In [None]:
print(blk_5_img_shapes)

In [None]:
np.round(blk_5_convolved_counts, 0)

In [None]:
print(blk_5_real_counts)

In [None]:
np.mean(blk_4_real_counts == np.round(blk_4_convolved_counts, 0))

In [None]:
print(blk_5_img_names)

In [None]:
save_subs_path_5 = 'all_preprocessed_data/Block_0105/sub_images_and_counts'

In [None]:
%%time
catch_everything_5 = []
for item in range(len(blk_5_img_names)):
    all_of_it_5 = create_subwindows_and_counts(path_to_contents_blk_5, blk_5_img_names[item], blk_5_convolutions[item], save_subs_path_5, stride = 300, kernel_size = 300)
    catch_everything_5.append(all_of_it_5)

In [None]:
# how many files in here - that divided by 2*20 would be the number of TS sequences that we will have
how_many = os.listdir(save_subs_path_5)
len(how_many)/(2*20)

In [None]:
%%time
# we will create and save dataframes with the subwindow names and the subcounts image-wise for now - later look into dataframe combinations
counter = 0
for i in range(len(catch_everything_5)):
    # create a dataframe
    data_frame = pd.DataFrame(zip(catch_everything_5[i][0], catch_everything_5[i][2]), columns = ['subwindow_name', 'sub_counts'])
    df_name = 'subcount_df_' + str(counter) + '.csv'
    df_path = 'all_preprocessed_data/Block_0105/sub_count_dfs/' + df_name
    # save the dataframe
    data_frame.to_csv(df_path, index = False)
    counter += 1 

In [None]:
len(catch_everything_5)

Block 0106

In [None]:
# test the funciton
path_to_contents_blk_6 = "../../S_lab_TasselNet/Block_6_TN/Block_6_images_and_xml"

In [None]:
all_ims_6, all_xmls_6 = separate_images_and_xml(path_to_contents_blk_6)

In [None]:
# the function works, check the lengths, the images will be more than the xml files
len(all_ims_6), len(all_xmls_6)

In [None]:
# all_ims_6

In [None]:
# all_xmls_6

In [None]:
# the function works!
blk6_annot, blk6_not_annot = separate_annotated_and_not(all_ims_6, all_xmls_6)

In [None]:
# blk6_annot

In [None]:
# blk6_not_annot

In [None]:
# test the function for block 1
a_vi_6, a_vx_6, a_hi_6, a_hx_6 = separate_annnot_vertical_horizontal(blk6_annot, all_xmls_6, 6)

In [None]:
# The function is working
check_matching(a_vi_6, a_vx_6), check_matching(a_hi_6, a_hx_6)

In [None]:
len(a_vi_6), len(a_vx_6), len(a_hi_6), len(a_hx_6)

In [None]:
all_horizontal_image_names_6 = [file.split(".")[0] for file in a_hi_6]
all_horizontal_image_names_6.sort()

In [None]:
print(all_horizontal_image_names_6)

In [None]:
len(all_horizontal_image_names_6)

In [None]:
%%time
catch_all_6 = []
for i in all_horizontal_image_names_6:
    contents_i = get_density_maps_horizontal(i, path_to_contents_blk_6)
    catch_all_6.append(contents_i)

In [None]:
%%time
# just make sure that for each images the real counts and the convolved counts match
blk_6_img_names = []
blk_6_img_shapes = []
blk_6_real_counts = []
blk_6_convolved_counts = []
blk_6_convolutions = []
for i in range(len(catch_all_6)):
    blk_6_img_names.append(catch_all_6[i][0])
    blk_6_img_shapes.append(catch_all_6[i][1])
    blk_6_real_counts.append(catch_all_6[i][2])
    blk_6_convolved_counts.append(catch_all_6[i][3])
    blk_6_convolutions.append(catch_all_6[i][4])


In [None]:
print(blk_6_img_shapes)

In [None]:
np.round(blk_6_convolved_counts, 0)

In [None]:
print(blk_6_real_counts)

In [None]:
np.mean(blk_6_real_counts == np.round(blk_6_convolved_counts, 0))

In [None]:
print(blk_6_img_names)

In [None]:
save_subs_path_6 = 'all_preprocessed_data/Block_0106/sub_images_and_counts'

In [None]:
%%time
catch_everything_6 = []
for item in range(len(blk_6_img_names)):
    all_of_it_6 = create_subwindows_and_counts(path_to_contents_blk_6, blk_6_img_names[item], blk_6_convolutions[item], save_subs_path_6, stride = 300, kernel_size = 300)
    catch_everything_6.append(all_of_it_6)

In [None]:
# how many files in here - that divided by 2*20 would be the number of TS sequences that we will have
how_many = os.listdir(save_subs_path_6)
len(how_many)/(2*20)

In [None]:
%%time
# we will create and save dataframes with the subwindow names and the subcounts image-wise for now - later look into dataframe combinations
counter = 0
for i in range(len(catch_everything_6)):
    # create a dataframe
    data_frame = pd.DataFrame(zip(catch_everything_6[i][0], catch_everything_6[i][2]), columns = ['subwindow_name', 'sub_counts'])
    df_name = 'subcount_df_' + str(counter) + '.csv'
    df_path = 'all_preprocessed_data/Block_0106/sub_count_dfs/' + df_name
    # save the dataframe
    data_frame.to_csv(df_path, index = False)
    counter += 1 

In [None]:
len(catch_everything_6)

Block 0201

In [None]:
# test the funciton
path_to_contents_blk_7 = "../../S_lab_TasselNet/Block_7_TN/Block_7_images_and_xml"

In [None]:
all_ims_7, all_xmls_7 = separate_images_and_xml(path_to_contents_blk_7)

In [None]:
# the function works, check the lengths, the images will be more than the xml files
len(all_ims_7), len(all_xmls_7)

In [None]:
# all_ims_7

In [None]:
# all_xmls_7

In [None]:
# the function works!
blk7_annot, blk7_not_annot = separate_annotated_and_not(all_ims_7, all_xmls_7)

In [None]:
# blk7_annot

In [None]:
# blk7_not_annot

In [None]:
# test the function for block 1
a_vi_7, a_vx_7, a_hi_7, a_hx_7 = separate_annnot_vertical_horizontal(blk7_annot, all_xmls_7, 5)

In [None]:
len(a_vi_7), len(a_vx_7), len(a_hi_7), len(a_hx_7)

In [None]:
# The function is working
check_matching(a_vi_7, a_vx_7), check_matching(a_hi_7, a_hx_7)

In [None]:
all_horizontal_image_names_7 = [file.split(".")[0] for file in a_hi_7]
all_horizontal_image_names_7.sort()

In [None]:
print(all_horizontal_image_names_7)

In [None]:
len(all_horizontal_image_names_7)

In [None]:
%%time
catch_all_7 = []
for i in all_horizontal_image_names_7:
    contents_i = get_density_maps_horizontal(i, path_to_contents_blk_7)
    catch_all_7.append(contents_i)

In [None]:
%%time
# just make sure that for each images the real counts and the convolved counts match
blk_7_img_names = []
blk_7_img_shapes = []
blk_7_real_counts = []
blk_7_convolved_counts = []
blk_7_convolutions = []
for i in range(len(catch_all_7)):
    blk_7_img_names.append(catch_all_7[i][0])
    blk_7_img_shapes.append(catch_all_7[i][1])
    blk_7_real_counts.append(catch_all_7[i][2])
    blk_7_convolved_counts.append(catch_all_7[i][3])
    blk_7_convolutions.append(catch_all_7[i][4])


In [None]:
print(blk_7_img_shapes)

In [None]:
np.round(blk_7_convolved_counts, 0)

In [None]:
print(blk_7_real_counts)

In [None]:
np.mean(blk_7_real_counts == np.round(blk_7_convolved_counts, 0))

In [None]:
print(blk_7_img_names)

In [None]:
save_subs_path_7 = 'all_preprocessed_data/Block_0201/sub_images_and_counts'

In [None]:
%%time
catch_everything_7 = []
for item in range(len(blk_7_img_names)):
    all_of_it_7 = create_subwindows_and_counts(path_to_contents_blk_7, blk_7_img_names[item], blk_7_convolutions[item], save_subs_path_7, stride = 300, kernel_size = 300)
    catch_everything_7.append(all_of_it_7)

In [None]:
# how many files in here - that divided by 2*20 would be the number of TS sequences that we will have
how_many = os.listdir(save_subs_path_7)
len(how_many)/(2*20)

In [None]:
%%time
# we will create and save dataframes with the subwindow names and the subcounts image-wise for now - later look into dataframe combinations
counter = 0
for i in range(len(catch_everything_7)):
    # create a dataframe
    data_frame = pd.DataFrame(zip(catch_everything_7[i][0], catch_everything_7[i][2]), columns = ['subwindow_name', 'sub_counts'])
    df_name = 'subcount_df_' + str(counter) + '.csv'
    df_path = 'all_preprocessed_data/Block_0201/sub_count_dfs/' + df_name
    # save the dataframe
    data_frame.to_csv(df_path, index = False)
    counter += 1 

In [None]:
len(catch_everything_7)

Block 0202

In [None]:
# test the funciton
path_to_contents_blk_8 = "../../S_lab_TasselNet/Block_8_TN/Block_8_images_and_xml"

In [None]:
all_ims_8, all_xmls_8 = separate_images_and_xml(path_to_contents_blk_8)

In [None]:
# the function works, check the lengths, the images will be more than the xml files
len(all_ims_8), len(all_xmls_8)

In [None]:
# all_ims_8

In [None]:
# all_xmls_8

In [None]:
# the function works!
blk8_annot, blk8_not_annot = separate_annotated_and_not(all_ims_8, all_xmls_8)

In [None]:
# blk8_annot

In [None]:
blk8_not_annot

In [None]:
# test the function for block 1
a_vi_8, a_vx_8, a_hi_8, a_hx_8 = separate_annnot_vertical_horizontal(blk8_annot, all_xmls_8, 8)

In [None]:
len(a_vi_8), len(a_vx_8), len(a_hi_8), len(a_hx_8)

In [None]:
# The function is working
check_matching(a_vi_8, a_vx_8), check_matching(a_hi_8, a_hx_8)

In [None]:
all_horizontal_image_names_8 = [file.split(".")[0] for file in a_hi_8]
all_horizontal_image_names_8.sort()

In [None]:
print(all_horizontal_image_names_8)

In [None]:
len(all_horizontal_image_names_8)

In [None]:
%%time
catch_all_8 = []
for i in all_horizontal_image_names_8:
    contents_i = get_density_maps_horizontal(i, path_to_contents_blk_8)
    catch_all_8.append(contents_i)

In [None]:
%%time
# just make sure that for each images the real counts and the convolved counts match
blk_8_img_names = []
blk_8_img_shapes = []
blk_8_real_counts = []
blk_8_convolved_counts = []
blk_8_convolutions = []
for i in range(len(catch_all_8)):
    blk_8_img_names.append(catch_all_8[i][0])
    blk_8_img_shapes.append(catch_all_8[i][1])
    blk_8_real_counts.append(catch_all_8[i][2])
    blk_8_convolved_counts.append(catch_all_8[i][3])
    blk_8_convolutions.append(catch_all_8[i][4])


In [None]:
print(blk_8_img_shapes)

In [None]:
np.round(blk_8_convolved_counts, 0)

In [None]:
print(blk_8_real_counts)

In [None]:
np.mean(blk_8_real_counts == np.round(blk_8_convolved_counts, 0))

In [None]:
print(blk_8_img_names)

In [None]:
save_subs_path_8 = 'all_preprocessed_data/Block_0202/sub_images_and_counts'

In [None]:
%%time
catch_everything_8 = []
for item in range(len(blk_8_img_names)):
    all_of_it_8 = create_subwindows_and_counts(path_to_contents_blk_8, blk_8_img_names[item], blk_8_convolutions[item], save_subs_path_8, stride = 300, kernel_size = 300)
    catch_everything_8.append(all_of_it_8)

In [None]:
# how many files in here - that divided by 2*20 would be the number of TS sequences that we will have
how_many = os.listdir(save_subs_path_8)
len(how_many)/(2*20)

In [None]:
%%time
# we will create and save dataframes with the subwindow names and the subcounts image-wise for now - later look into dataframe combinations
counter = 0
for i in range(len(catch_everything_8)):
    # create a dataframe
    data_frame = pd.DataFrame(zip(catch_everything_8[i][0], catch_everything_8[i][2]), columns = ['subwindow_name', 'sub_counts'])
    df_name = 'subcount_df_' + str(counter) + '.csv'
    df_path = 'all_preprocessed_data/Block_0202/sub_count_dfs/' + df_name
    # save the dataframe
    data_frame.to_csv(df_path, index = False)
    counter += 1 

In [None]:
len(catch_everything_8)

Block 0205

In [None]:
# test the funciton
path_to_contents_blk_11 = "../../S_lab_TasselNet/Block_11_TN/Block_11_images_and_xml"

In [None]:
all_ims_11, all_xmls_11 = separate_images_and_xml(path_to_contents_blk_11)

In [None]:
# the function works, check the lengths, the images will be more than the xml files
len(all_ims_11), len(all_xmls_11)

In [None]:
# all_ims_11

In [None]:
# all_xmls_11

In [None]:
blk11_annot, blk11_not_annot = separate_annotated_and_not(all_ims_11, all_xmls_11)

In [None]:
# blk11_annot

In [None]:
blk11_not_annot

In [None]:
# test the function for block 1
a_vi_11, a_vx_11, a_hi_11, a_hx_11 = separate_annnot_vertical_horizontal(blk11_annot, all_xmls_11, 6)

In [None]:
len(a_vi_11), len(a_vx_11), len(a_hi_11), len(a_hx_11)

In [None]:
# The function is working
check_matching(a_vi_11, a_vx_11), check_matching(a_hi_11, a_hx_11)

In [None]:
all_horizontal_image_names_11 = [file.split(".")[0] for file in a_hi_11]
all_horizontal_image_names_11.sort()

In [None]:
print(all_horizontal_image_names_11)

In [None]:
len(all_horizontal_image_names_11)

In [None]:
%%time
catch_all_11 = []
for i in all_horizontal_image_names_11:
    contents_i = get_density_maps_horizontal(i, path_to_contents_blk_11)
    catch_all_11.append(contents_i)

In [None]:
%%time
# just make sure that for each images the real counts and the convolved counts match
blk_11_img_names = []
blk_11_img_shapes = []
blk_11_real_counts = []
blk_11_convolved_counts = []
blk_11_convolutions = []
for i in range(len(catch_all_11)):
    blk_11_img_names.append(catch_all_11[i][0])
    blk_11_img_shapes.append(catch_all_11[i][1])
    blk_11_real_counts.append(catch_all_11[i][2])
    blk_11_convolved_counts.append(catch_all_11[i][3])
    blk_11_convolutions.append(catch_all_11[i][4])


In [None]:
print(blk_11_img_shapes)

In [None]:
np.round(blk_11_convolved_counts, 0)

In [None]:
print(blk_11_real_counts)

In [None]:
np.mean(blk_11_real_counts == np.round(blk_11_convolved_counts, 0))

In [None]:
print(blk_11_img_names)

In [None]:
save_subs_path_11 = 'all_preprocessed_data/Block_0205/sub_images_and_counts'

In [None]:
%%time
catch_everything_11 = []
for item in range(len(blk_11_img_names)):
    all_of_it_11 = create_subwindows_and_counts(path_to_contents_blk_11, blk_11_img_names[item], blk_11_convolutions[item], save_subs_path_11, stride = 300, kernel_size = 300)
    catch_everything_11.append(all_of_it_11)

In [None]:
# how many files in here - that divided by 2*20 would be the number of TS sequences that we will have
how_many = os.listdir(save_subs_path_11)
len(how_many)/(2*20)

In [None]:
%%time
# we will create and save dataframes with the subwindow names and the subcounts image-wise for now - later look into dataframe combinations
counter = 0
for i in range(len(catch_everything_11)):
    # create a dataframe
    data_frame = pd.DataFrame(zip(catch_everything_11[i][0], catch_everything_11[i][2]), columns = ['subwindow_name', 'sub_counts'])
    df_name = 'subcount_df_' + str(counter) + '.csv'
    df_path = 'all_preprocessed_data/Block_0205/sub_count_dfs/' + df_name
    # save the dataframe
    data_frame.to_csv(df_path, index = False)
    counter += 1 

In [None]:
len(catch_everything_11)

Block 0206

In [None]:
# test the funciton
path_to_contents_blk_12 = "../../S_lab_TasselNet/Block_12_TN/Block_12_images_and_xml"

In [None]:
all_ims_12, all_xmls_12 = separate_images_and_xml(path_to_contents_blk_12)

In [None]:
# the function works, check the lengths, the images will be more than the xml files
len(all_ims_12), len(all_xmls_12)

In [None]:
# all_ims_12

In [None]:
# all_xmls_12

In [None]:
blk12_annot, blk12_not_annot = separate_annotated_and_not(all_ims_12, all_xmls_12)

In [None]:
# blk12_annot

In [None]:
# blk12_not_annot

In [None]:
# test the function for block 1
a_vi_12, a_vx_12, a_hi_12, a_hx_12 = separate_annnot_vertical_horizontal(blk12_annot, all_xmls_12, 7)

In [None]:
len(a_vi_12), len(a_vx_12), len(a_hi_12), len(a_hx_12)

In [None]:
# The function is working
check_matching(a_vi_12, a_vx_12), check_matching(a_hi_12, a_hx_12)

In [None]:
all_horizontal_image_names_12 = [file.split(".")[0] for file in a_hi_12]
all_horizontal_image_names_12.sort()

In [None]:
print(all_horizontal_image_names_12)

In [None]:
len(all_horizontal_image_names_12)

In [None]:
%%time
catch_all_12 = []
for i in all_horizontal_image_names_12:
    contents_i = get_density_maps_horizontal(i, path_to_contents_blk_12)
    catch_all_12.append(contents_i)

In [None]:
%%time
# just make sure that for each images the real counts and the convolved counts match
blk_12_img_names = []
blk_12_img_shapes = []
blk_12_real_counts = []
blk_12_convolved_counts = []
blk_12_convolutions = []
for i in range(len(catch_all_12)):
    blk_12_img_names.append(catch_all_12[i][0])
    blk_12_img_shapes.append(catch_all_12[i][1])
    blk_12_real_counts.append(catch_all_12[i][2])
    blk_12_convolved_counts.append(catch_all_12[i][3])
    blk_12_convolutions.append(catch_all_12[i][4])


In [None]:
print(blk_12_img_shapes)

In [None]:
np.round(blk_12_convolved_counts, 0)

In [None]:
print(blk_12_real_counts)

In [None]:
np.mean(blk_12_real_counts == np.round(blk_12_convolved_counts, 0))

In [None]:
print(blk_12_img_names)

In [None]:
save_subs_path_12 = 'all_preprocessed_data/Block_0206/sub_images_and_counts'

In [None]:
%%time
catch_everything_12 = []
for item in range(len(blk_12_img_names)):
    all_of_it_12 = create_subwindows_and_counts(path_to_contents_blk_12, blk_12_img_names[item], blk_12_convolutions[item], save_subs_path_12, stride = 300, kernel_size = 300)
    catch_everything_12.append(all_of_it_12)

In [None]:
# how many files in here - that divided by 2*20 would be the number of TS sequences that we will have
how_many = os.listdir(save_subs_path_12)
len(how_many)/(2*20)

In [None]:
%%time
# we will create and save dataframes with the subwindow names and the subcounts image-wise for now - later look into dataframe combinations
counter = 0
for i in range(len(catch_everything_12)):
    # create a dataframe
    data_frame = pd.DataFrame(zip(catch_everything_12[i][0], catch_everything_12[i][2]), columns = ['subwindow_name', 'sub_counts'])
    df_name = 'subcount_df_' + str(counter) + '.csv'
    df_path = 'all_preprocessed_data/Block_0206/sub_count_dfs/' + df_name
    # save the dataframe
    data_frame.to_csv(df_path, index = False)
    counter += 1 

In [None]:
len(catch_everything_12)

Block 0302

In [None]:
# test the funciton
path_to_contents_blk_14 = "../../S_lab_TasselNet/Block_14_TN/Block_14_images_and_xml"

In [None]:
all_ims_14, all_xmls_14 = separate_images_and_xml(path_to_contents_blk_14)

In [None]:
# the function works, check the lengths, the images will be more than the xml files
len(all_ims_14), len(all_xmls_14)

In [None]:
# all_ims_14

In [None]:
# all_xmls_14

In [None]:
blk14_annot, blk14_not_annot = separate_annotated_and_not(all_ims_14, all_xmls_14)

In [None]:
# blk14_annot

In [None]:
# blk14_not_annot

In [None]:
# test the function for block 1
a_vi_14, a_vx_14, a_hi_14, a_hx_14 = separate_annnot_vertical_horizontal(blk14_annot, all_xmls_14, 7)

In [None]:
len(a_vi_14), len(a_vx_14), len(a_hi_14), len(a_hx_14)

In [None]:
# The function is working
check_matching(a_vi_14, a_vx_14), check_matching(a_hi_14, a_hx_14)

In [None]:
all_horizontal_image_names_14 = [file.split(".")[0] for file in a_hi_14]
all_horizontal_image_names_14.sort()

In [None]:
print(all_horizontal_image_names_14)

In [None]:
len(all_horizontal_image_names_14)

In [None]:
%%time
catch_all_14 = []
for i in all_horizontal_image_names_14:
    contents_i = get_density_maps_horizontal(i, path_to_contents_blk_14)
    catch_all_14.append(contents_i)

In [None]:
%%time
# just make sure that for each images the real counts and the convolved counts match
blk_14_img_names = []
blk_14_img_shapes = []
blk_14_real_counts = []
blk_14_convolved_counts = []
blk_14_convolutions = []
for i in range(len(catch_all_14)):
    blk_14_img_names.append(catch_all_14[i][0])
    blk_14_img_shapes.append(catch_all_14[i][1])
    blk_14_real_counts.append(catch_all_14[i][2])
    blk_14_convolved_counts.append(catch_all_14[i][3])
    blk_14_convolutions.append(catch_all_14[i][4])


In [None]:
print(blk_14_img_shapes)

In [None]:
np.round(blk_14_convolved_counts, 0)

In [None]:
print(blk_14_real_counts)

In [None]:
np.mean(blk_14_real_counts == np.round(blk_14_convolved_counts, 0))

In [None]:
print(blk_14_img_names)

In [None]:
save_subs_path_14 = 'all_preprocessed_data/Block_0302/sub_images_and_counts'

In [None]:
%%time
catch_everything_14 = []
for item in range(len(blk_14_img_names)):
    all_of_it_14 = create_subwindows_and_counts(path_to_contents_blk_14, blk_14_img_names[item], blk_14_convolutions[item], save_subs_path_14, stride = 300, kernel_size = 300)
    catch_everything_14.append(all_of_it_14)

In [None]:
# how many files in here - that divided by 2*20 would be the number of TS sequences that we will have
how_many = os.listdir(save_subs_path_14)
len(how_many)/(2*20)

In [None]:
%%time
# we will create and save dataframes with the subwindow names and the subcounts image-wise for now - later look into dataframe combinations
counter = 0
for i in range(len(catch_everything_14)):
    # create a dataframe
    data_frame = pd.DataFrame(zip(catch_everything_14[i][0], catch_everything_14[i][2]), columns = ['subwindow_name', 'sub_counts'])
    df_name = 'subcount_df_' + str(counter) + '.csv'
    df_path = 'all_preprocessed_data/Block_0302/sub_count_dfs/' + df_name
    # save the dataframe
    data_frame.to_csv(df_path, index = False)
    counter += 1 

In [None]:
len(catch_everything_14)