In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

2024-07-25 17:04:36.969608: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-25 17:04:37.360699: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-25 17:04:37.360756: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-25 17:04:37.449696: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-25 17:04:37.575126: I tensorflow/core/platform/cpu_feature_guar

In [2]:
# load the trained model
gmp_model = tf.keras.models.load_model('../models/trained_gmp_model_dense_32_new.h5')

In [3]:
gmp_model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, None, 3)]   0         
                                                                 
 conv2d (Conv2D)             (None, None, None, 32)    896       
                                                                 
 activation (Activation)     (None, None, None, 32)    0         
                                                                 
 conv2d_1 (Conv2D)           (None, None, None, 32)    9248      
                                                                 
 activation_1 (Activation)   (None, None, None, 32)    0         
                                                                 
 max_pooling2d (MaxPooling2  (None, None, None, 32)    0         
 D)                                                              
                                                           

In [4]:
# This model does not have a Dense layer that can be used to extract features that can be used in a downstream bayesian TS model. So another model was retrained - trained_gmp_model_dense_32.h5

In [5]:
# feature extractor input
feat_ext_input = gmp_model.input

In [6]:
# feature extractor output - do this at the ReLu activation layer - as this will give the same features as the dropout layer (It does not matter if it is the dropout or the activation layer, the extracted features will be the same)
feat_ext_output = gmp_model.layers[-4].output

In [7]:
feature_extractor_model = tf.keras.models.Model(inputs = feat_ext_input, outputs = feat_ext_output)

In [8]:
feature_extractor_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, None, 3)]   0         
                                                                 
 conv2d (Conv2D)             (None, None, None, 32)    896       
                                                                 
 activation (Activation)     (None, None, None, 32)    0         
                                                                 
 conv2d_1 (Conv2D)           (None, None, None, 32)    9248      
                                                                 
 activation_1 (Activation)   (None, None, None, 32)    0         
                                                                 
 max_pooling2d (MaxPooling2  (None, None, None, 32)    0         
 D)                                                              
                                                             

In [9]:
# We might need to be cautious here though, as dropout will set some neurons to 0, and the extracted features will therefore become zero - workaround - use the Dense layer to extract features before the pred head (and the dropout layer)

Locate the previously preprocessed data

In [None]:
# We currently have just a single block - 0103

In [10]:
prev_preprocessed_image_loc = "preprocessed_data/Block_0103/sub_images_and_counts"

In [11]:
folder_contents = os.listdir(prev_preprocessed_image_loc)
folder_contents.sort()

In [14]:
len(folder_contents)/(2*42)

20.0

In [15]:
folder_contents[0]

'Block0103_2020_08_03_0.npy'

In [16]:
try_fold = np.load(os.path.join(prev_preprocessed_image_loc, folder_contents[0]))
try_fold.shape

(300, 300, 3)

In [17]:
try_fold = try_fold[None,  ...]

In [18]:
# see if the model gives predictions
pred_feats_try = feature_extractor_model.predict(try_fold)



In [19]:
pred_feats_try.shape

(1, 32)

In [20]:
pred_feats_try

array([[0.        , 0.        , 0.10059829, 0.        , 0.07901739,
        0.        , 0.        , 0.        , 0.03111666, 0.08963891,
        0.        , 0.        , 0.10756761, 0.        , 0.14078055,
        0.        , 0.03288436, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.09947655, 0.11643489, 0.        ,
        0.03223062, 0.        , 0.06246001, 0.07843385, 0.        ,
        0.        , 0.        ]], dtype=float32)

In [None]:
# may need to write a function that does feature extraction as well as get the respective density values for each subwindow - this may have to be carried manually to each sub window, but is there a way to automate this? Think on this

In [25]:
# sub 10
digits_im = 3
digits_d = 11
character = '10'
sub_10_images = [item for item in folder_contents if item.split(".")[0][-digits_im:] == ('_' + character)]
sub_10_dense_maps = [item for item in folder_contents if item.split(".")[0][-digits_d:] == ('_' + character + '_density')]

In [28]:
len(sub_10_images)

20

In [30]:
len(sub_10_dense_maps)

20

In [31]:
all_preds = []
all_densitites = []
for i in range(len(sub_10_images)):
    # load the subimage
    load_image = np.load(os.path.join(prev_preprocessed_image_loc, sub_10_images[i]))
    # reshape it to get the prediction
    load_image_reshape = load_image[None, ...]
    # get the prediction from the model
    extracted_features = feature_extractor_model.predict(load_image_reshape)
    all_preds.append(extracted_features)

    # load the count map
    load_count_map = np.load(os.path.join(prev_preprocessed_image_loc, sub_10_dense_maps[i]))
    count = np.sum(load_count_map)
    all_densitites.append(count)



In [32]:
len(all_preds)

20

In [35]:
def extract_features_and_get_counts(model, preprocessed_path, all_in_folder, digits_image, digits_dense, character, store_path):
    necessary_sub_images = [item for item in all_in_folder if item.split(".")[0][-digits_image:] == ('_' + character)]
    necessary_sub_images.sort()
    necessary_sub_dense_maps = [item for item in all_in_folder if item.split(".")[0][-digits_dense:] == ('_' + character + '_density')]
    necessary_sub_dense_maps.sort()
    print(necessary_sub_images[0])
    # check if the image and density maps tally
    print(np.mean([item.split('.')[0] for item in necessary_sub_images] == [item.split(".")[0][:-8] for item in necessary_sub_dense_maps]))
    
    collect_extracted_features = []
    collect_corresponding_counts = []

    # collect the subwindow-wise features and corresponding counts
    for i in range(len(necessary_sub_images)):
        # load the subimage
        load_image = np.load(os.path.join(preprocessed_path, necessary_sub_images[i]))
        # reshape it to get the prediction
        load_image_reshape = load_image[None, ...]
        # get the prediction from the model
        extracted_features = model.predict(load_image_reshape, verbose = 0)
        collect_extracted_features.append(extracted_features)

        # load the count map
        load_count_map = np.load(os.path.join(preprocessed_path, necessary_sub_dense_maps[i]))
        count = np.sum(load_count_map)
        collect_corresponding_counts.append(count)

    # collect all extracted features data and create a dataframe
    Features_data = pd.DataFrame(np.vstack(collect_extracted_features), columns = ['feature_' + str(i) for i in range(np.vstack(collect_extracted_features).shape[1])])
    # dataframe for counts
    counts_subwindow = pd.DataFrame(collect_corresponding_counts, columns = ['tassel_count'])
    # combine the two dataframes
    combined_df = pd.concat((Features_data,counts_subwindow), axis = 1 )
    # save this dataframe
    save_path_name = store_path + 'extracted_features_sub_window_' + character + '.csv'
    combined_df.to_csv(save_path_name, index = False)

    return(combined_df)

In [36]:
s_path = 'preprocessed_data/Block_0103/TS_ready_data_frames/'

In [37]:
# try the function
hello_1 = extract_features_and_get_counts(feature_extractor_model, prev_preprocessed_image_loc, folder_contents, 2, 10, '1', s_path)

Block0103_2020_08_03_1.npy
1.0


In [38]:
hello_1.head()

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,feature_23,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,tassel_count
0,0.031226,0.093601,0.16666,0.005055,0.138135,0.0,0.0,0.0,0.10862,0.153721,...,0.186676,0.0,0.109686,0.0,0.142898,0.145159,0.0,0.0,0.0,6.119189
1,0.006039,0.075339,0.177254,0.027024,0.134824,0.0,0.0,0.0,0.092583,0.163522,...,0.197102,0.0,0.109875,0.0,0.174644,0.155566,0.0,0.0,0.0,6.49974
2,0.100629,0.149711,0.200657,0.078055,0.177551,0.0,0.0,0.0,0.156143,0.191151,...,0.234681,0.0,0.148043,0.0,0.176578,0.180028,0.0,0.0,0.04056,8.005755
3,0.124286,0.169779,0.243148,0.114587,0.223154,0.0,0.0,0.0,0.186565,0.234492,...,0.277311,0.0,0.192365,0.0,0.227407,0.213771,0.0,0.0,0.083839,9.803773
4,0.023037,0.094766,0.189089,0.040155,0.165265,0.0,0.0,0.0,0.126992,0.177692,...,0.226524,0.0,0.132325,0.0,0.191863,0.170676,0.0,0.0,0.0,6.999814


In [39]:
# try the function for another character
hello_2 = extract_features_and_get_counts(feature_extractor_model, prev_preprocessed_image_loc, folder_contents, 2, 10, '2', s_path)

Block0103_2020_08_03_2.npy
1.0


In [40]:
hello_2.head()

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,feature_23,feature_24,feature_25,feature_26,feature_27,feature_28,feature_29,feature_30,feature_31,tassel_count
0,0.0,0.0,0.078899,0.0,0.036574,0.016471,0.0,0.021107,0.0,0.051425,...,0.076175,0.014197,0.0,0.028712,0.026997,0.059702,0.012823,0.007846,0.0,5.008098
1,0.0,0.028107,0.142465,0.0,0.093712,0.0,0.0,0.0,0.050947,0.122917,...,0.163311,0.0,0.060314,0.0,0.129855,0.121143,0.0,0.0,0.0,4.656012
2,0.086968,0.141667,0.197355,0.048163,0.174495,0.0,0.0,0.0,0.1496,0.186943,...,0.22741,0.0,0.141835,0.0,0.169448,0.173595,0.0,0.0,0.002625,4.942209
3,0.116978,0.153644,0.217968,0.107848,0.200131,0.0,0.0,0.0,0.171069,0.214533,...,0.250111,0.0,0.168424,0.0,0.206105,0.19639,0.0,0.0,0.082019,4.994409
4,0.0,0.0,0.098989,0.0,0.061778,0.0,0.0,0.0,0.024364,0.083518,...,0.1395,0.0,0.008837,0.0,0.100934,0.088162,0.0,0.0,0.0,4.0


In [41]:
# get the predictions for all subimages for the block? do separately for single digit subimages and double digit subimages separately
character_list = list(range(10))
character_list = list(map(str, character_list))

In [42]:
%%time

# for single digit subwindows
catch_all_dfs = []
for character in character_list:
    features_df = extract_features_and_get_counts(feature_extractor_model, prev_preprocessed_image_loc, folder_contents, 2, 10, character, s_path)
    catch_all_dfs.append(features_df)

Block0103_2020_08_03_0.npy
1.0
Block0103_2020_08_03_1.npy
1.0
Block0103_2020_08_03_2.npy
1.0
Block0103_2020_08_03_3.npy
1.0
Block0103_2020_08_03_4.npy
1.0
Block0103_2020_08_03_5.npy
1.0
Block0103_2020_08_03_6.npy
1.0
Block0103_2020_08_03_7.npy
1.0
Block0103_2020_08_03_8.npy
1.0
Block0103_2020_08_03_9.npy
1.0
CPU times: user 24.2 s, sys: 1.39 s, total: 25.6 s
Wall time: 16.6 s


In [43]:
# get the predictions for double digits
character_list_double = list(range(10, 42))
character_list_double = list(map(str, character_list_double))

In [46]:
# character_list_double

In [45]:
%%time

# for double digit subwindows
catch_all_dfs = []
for character in character_list_double:
    features_df = extract_features_and_get_counts(feature_extractor_model, prev_preprocessed_image_loc, folder_contents, 3, 11, character, s_path)
    catch_all_dfs.append(features_df)

Block0103_2020_08_03_10.npy
1.0
Block0103_2020_08_03_11.npy
1.0
Block0103_2020_08_03_12.npy
1.0
Block0103_2020_08_03_13.npy
1.0
Block0103_2020_08_03_14.npy
1.0
Block0103_2020_08_03_15.npy
1.0
Block0103_2020_08_03_16.npy
1.0
Block0103_2020_08_03_17.npy
1.0
Block0103_2020_08_03_18.npy
1.0
Block0103_2020_08_03_19.npy
1.0
Block0103_2020_08_03_20.npy
1.0
Block0103_2020_08_03_21.npy
1.0
Block0103_2020_08_03_22.npy
1.0
Block0103_2020_08_03_23.npy
1.0
Block0103_2020_08_03_24.npy
1.0
Block0103_2020_08_03_25.npy
1.0
Block0103_2020_08_03_26.npy
1.0
Block0103_2020_08_03_27.npy
1.0
Block0103_2020_08_03_28.npy
1.0
Block0103_2020_08_03_29.npy
1.0
Block0103_2020_08_03_30.npy
1.0
Block0103_2020_08_03_31.npy
1.0
Block0103_2020_08_03_32.npy
1.0
Block0103_2020_08_03_33.npy
1.0
Block0103_2020_08_03_34.npy
1.0
Block0103_2020_08_03_35.npy
1.0
Block0103_2020_08_03_36.npy
1.0
Block0103_2020_08_03_37.npy
1.0
Block0103_2020_08_03_38.npy
1.0
Block0103_2020_08_03_39.npy
1.0
Block0103_2020_08_03_40.npy
1.0
Block010