In [1]:
# We only need the features extracted for the final 7 images (sub-images) in the sequence

In [2]:
import tensorflow as tf
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import ndimage
from skimage.transform import resize

2025-07-14 18:31:49.414620: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-14 18:31:49.450805: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-07-14 18:31:49.450828: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-07-14 18:31:49.451817: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-07-14 18:31:49.458006: I tensorflow/core/platform/cpu_feature_guar

In [3]:
# import the trained model

model = tf.keras.models.load_model("../../Spring_2024/Bayes_for_comps/TS_bayes_implementation_for_TN/models/trained_gmp_model_dense_32_new.h5")

2025-07-14 18:31:51.447803: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1929] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31134 MB memory:  -> device: 0, name: Tesla V100S-PCIE-32GB, pci bus id: 0000:86:00.0, compute capability: 7.0


In [4]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, None, 3)]   0         
                                                                 
 conv2d (Conv2D)             (None, None, None, 32)    896       
                                                                 
 activation (Activation)     (None, None, None, 32)    0         
                                                                 
 conv2d_1 (Conv2D)           (None, None, None, 32)    9248      
                                                                 
 activation_1 (Activation)   (None, None, None, 32)    0         
                                                                 
 max_pooling2d (MaxPooling2  (None, None, None, 32)    0         
 D)                                                              
                                                           

In [5]:
# Define the feature extractor model

# feature extractor input
feat_ext_input = model.input

# feature extractor output - do this at the ReLu activation layer - as this will give the same features as the dropout layer (It does not matter if it is the dropout or the activation layer, the extracted features will be the same)
feat_ext_output = model.layers[-4].output

feature_extractor_model = tf.keras.models.Model(inputs = feat_ext_input, outputs = feat_ext_output)

In [6]:
feature_extractor_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, None, 3)]   0         
                                                                 
 conv2d (Conv2D)             (None, None, None, 32)    896       
                                                                 
 activation (Activation)     (None, None, None, 32)    0         
                                                                 
 conv2d_1 (Conv2D)           (None, None, None, 32)    9248      
                                                                 
 activation_1 (Activation)   (None, None, None, 32)    0         
                                                                 
 max_pooling2d (MaxPooling2  (None, None, None, 32)    0         
 D)                                                              
                                                             

In [7]:
# Okay, now let's worry about the data. We only need the features extracted for the last 7 images in teh sequence. Let's get these here? Also note that we do not have anything to do with the density maps for now, for the stage 1 implementaiton at least.

In [8]:
# The following functions will help create all the required matrices

In [9]:
def chose_xml_and_jpeg(file_location):
    # list all files in location
    list_of_all_files = os.listdir(file_location)
    # sort files
    list_of_all_files.sort()
    # separate xml and jpeg files
    all_xml_files = [file for file in list_of_all_files if file.split('.')[-1] == 'xml']
    all_xml_files.sort()
    all_jpeg_files = [file for file in list_of_all_files if file not in all_xml_files]
    all_jpeg_files.sort()
    # get the final 7 image files
    chosen_jpeg_files = all_jpeg_files[-7:]
    
    return chosen_jpeg_files

In [10]:
def create_sub_windows(folder_path, file, stride = 30, kernel_size = 30):
    # joined image path
    joined_im_path = os.path.join(folder_path, file)
    # read the image
    loaded_im_file = plt.imread(joined_im_path)
    # create subwindows and get prediction
    img_height = loaded_im_file.shape[0]
    img_width = loaded_im_file.shape[1]

    # catch all subwindows here
    all_subwindows = []
    # you can also keep track the subwindows here if required - but let's not worry about that for now
    for i in  range(0, img_height, stride):
        for j in range(0, img_width, stride):
            sub_window = loaded_im_file[i: i + kernel_size, j : j + kernel_size,:]
            # resize the subwindow - for 300*300
            sub_window = resize(sub_window, (kernel_size, kernel_size,3))
            # append these to the list
            all_subwindows.append(sub_window)
            
    return all_subwindows  

In [11]:
def extract_features(file_name, file_path):
    # get the subwindows
    subwindows = create_sub_windows(file_path, file_name, 30, 30)
    # stack the subwindows
    stacked_subwindows = np.stack(subwindows, axis = 0)
    # print the shape of this
    print(stacked_subwindows.shape)
    # extract features
    extracted_featrues = feature_extractor_model.predict(stacked_subwindows)
    
    return extracted_featrues

In [12]:
# Let's first work on the train data blocks

In [13]:
# block 0101

In [14]:


block_0101 = "../../Spring_2024/S_lab_TasselNet/Block_1_TN/Block_1_images_and_xml"

In [15]:
images_block_0101 = chose_xml_and_jpeg(block_0101)

In [16]:
images_block_0101

['Block0101_2020_08_26.jpeg',
 'Block0101_2020_08_27.jpeg',
 'Block0101_2020_08_28.jpeg',
 'Block0101_2020_08_31.jpeg',
 'Block0101_2020_09_02.jpeg',
 'Block0101_2020_09_07.jpeg',
 'Block0101_2020_09_16.jpeg']

In [17]:
%%time
# get the extracted input features

blk_0101_all_features = []
for file in images_block_0101:
    extracted_features = extract_features(file, block_0101)
    blk_0101_all_features.append(extracted_features)

(910, 30, 30, 3)


2025-07-14 18:31:52.376776: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8907


(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
CPU times: user 2.82 s, sys: 275 ms, total: 3.1 s
Wall time: 3.02 s


In [18]:
# stack the extracted features
blk_0101_stacked_extracted_features = np.stack(blk_0101_all_features, axis = 1)

In [19]:
blk_0101_stacked_extracted_features.shape

(910, 7, 32)

In [20]:
# see if the mean of the extracted features match to the ones earlier
np.mean(blk_0101_stacked_extracted_features)

0.36970586

In [21]:
# let's not save or stack this for now, let's stack all train data in order, and then save

In [22]:
# block 0102

In [23]:
block_0102 = "../../Spring_2024/S_lab_TasselNet/Block_2_TN/Block_2_images_and_xml"

In [24]:
images_block_0102 = chose_xml_and_jpeg(block_0102)

In [25]:
images_block_0102

['Block0102_2020_08_26.jpeg',
 'Block0102_2020_08_27.jpeg',
 'Block0102_2020_08_28.jpeg',
 'Block0102_2020_08_31.jpeg',
 'Block0102_2020_09_02.jpeg',
 'Block0102_2020_09_07.jpeg',
 'Block0102_2020_09_16.jpeg']

In [26]:
%%time
# get the extracted input features

blk_0102_all_features = []
for file in images_block_0102:
    extracted_features = extract_features(file, block_0102)
    blk_0102_all_features.append(extracted_features)

(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
CPU times: user 2.43 s, sys: 155 ms, total: 2.59 s
Wall time: 2.44 s


In [27]:
# stack the extracted features
blk_0102_stacked_extracted_features = np.stack(blk_0102_all_features, axis = 1)

In [28]:
blk_0102_stacked_extracted_features.shape

(910, 7, 32)

In [29]:
# see if the mean of the extracted features match to the ones earlier
np.mean(blk_0102_stacked_extracted_features)

0.3753665

In [30]:
# block 0203

In [31]:
block_0203 = "../../Spring_2024/S_lab_TasselNet/Block_9_TN/Block_9_images_and_xml"

In [32]:
images_block_0203 = chose_xml_and_jpeg(block_0203)

In [33]:
images_block_0203

['Block0203_2020_08_26.jpeg',
 'Block0203_2020_08_27.jpeg',
 'Block0203_2020_08_28.jpeg',
 'Block0203_2020_08_31.jpeg',
 'Block0203_2020_09_02.jpeg',
 'Block0203_2020_09_07.jpeg',
 'Block0203_2020_09_16.jpeg']

In [34]:
%%time
# get the extracted input features

blk_0203_all_features = []
for file in images_block_0203:
    extracted_features = extract_features(file, block_0203)
    blk_0203_all_features.append(extracted_features)

(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
CPU times: user 2.4 s, sys: 145 ms, total: 2.54 s
Wall time: 2.4 s


In [35]:
# stack the extracted features
blk_0203_stacked_extracted_features = np.stack(blk_0203_all_features, axis = 1)

In [36]:
blk_0203_stacked_extracted_features.shape

(910, 7, 32)

In [37]:
# see if the mean of the extracted features match to the ones earlier
np.mean(blk_0203_stacked_extracted_features)

0.33241686

In [38]:
# block 0301

In [39]:
block_0301 = "../../Spring_2024/S_lab_TasselNet/Block_13_TN/Block_13_images_and_xml"

In [40]:
images_block_0301 = chose_xml_and_jpeg(block_0301)

In [41]:
images_block_0301

['Block0301_2020_08_26.jpeg',
 'Block0301_2020_08_27.jpeg',
 'Block0301_2020_08_28.jpeg',
 'Block0301_2020_08_31.jpeg',
 'Block0301_2020_09_02.jpeg',
 'Block0301_2020_09_07.jpeg',
 'Block0301_2020_09_16.jpeg']

In [42]:
%%time
# get the extracted input features

blk_0301_all_features = []
for file in images_block_0301:
    extracted_features = extract_features(file, block_0301)
    blk_0301_all_features.append(extracted_features)

(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
CPU times: user 2.44 s, sys: 132 ms, total: 2.57 s
Wall time: 2.45 s


In [43]:
# stack the extracted features
blk_0301_stacked_extracted_features = np.stack(blk_0301_all_features, axis = 1)

In [44]:
blk_0301_stacked_extracted_features.shape

(910, 7, 32)

In [45]:
# see if the mean of the extracted features match to the ones earlier
np.mean(blk_0301_stacked_extracted_features)

0.36661348

In [46]:
# We can stack all these now and save?

all_train_targets_list = [blk_0101_stacked_extracted_features, blk_0102_stacked_extracted_features, blk_0203_stacked_extracted_features, blk_0301_stacked_extracted_features]

In [47]:
# stack all these together?
all_train_targets = np.vstack(all_train_targets_list)

In [48]:
all_train_targets.shape

(3640, 7, 32)

In [49]:
# sanity check
print(np.mean(all_train_targets[910*0:910*1, :, :] == blk_0101_stacked_extracted_features))
print(np.mean(all_train_targets[910*1:910*2, :, :] == blk_0102_stacked_extracted_features))
print(np.mean(all_train_targets[910*2:910*3, :, :] == blk_0203_stacked_extracted_features))
print(np.mean(all_train_targets[910*3:910*4, :, :] == blk_0301_stacked_extracted_features))

1.0
1.0
1.0
1.0


In [50]:
# save the stack
np.save("data/train_out_targets/all_train_targets.npy", all_train_targets)

In [51]:
# sanity check
np.mean(all_train_targets == np.load("data/train_out_targets/all_train_targets.npy"))

1.0

In [52]:
# Do this also for the validation data, and then stop for the day?

In [53]:
# block 0204

In [54]:
block_0204 = "../../Spring_2024/S_lab_TasselNet/Block_10_TN/Block_10_images_and_xml"

In [55]:
images_block_0204 = chose_xml_and_jpeg(block_0204)

In [56]:
images_block_0204

['Block0204_2020_08_26.jpeg',
 'Block0204_2020_08_27.jpeg',
 'Block0204_2020_08_28.jpeg',
 'Block0204_2020_08_31.jpeg',
 'Block0204_2020_09_02.jpeg',
 'Block0204_2020_09_07.jpeg',
 'Block0204_2020_09_16.jpeg']

In [57]:
%%time
# get the extracted input features

blk_0204_all_features = []
for file in images_block_0204:
    extracted_features = extract_features(file, block_0204)
    blk_0204_all_features.append(extracted_features)

(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
(910, 30, 30, 3)
CPU times: user 2.44 s, sys: 147 ms, total: 2.59 s
Wall time: 2.45 s


In [58]:
# stack the extracted features
blk_0204_stacked_extracted_features = np.stack(blk_0204_all_features, axis = 1)

In [59]:
blk_0204_stacked_extracted_features.shape

(910, 7, 32)

In [60]:
# see if the mean of the extracted features match to the ones earlier
np.mean(blk_0204_stacked_extracted_features)

0.36944777

In [61]:
# save the data
np.save("data/valid_out_targets/all_valid_targets.npy", blk_0204_stacked_extracted_features)

In [62]:
# sanity check
np.mean(blk_0204_stacked_extracted_features == np.load("data/valid_out_targets/all_valid_targets.npy"))

1.0