In [1]:
# import the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import random
import shutil
import cv2
from scipy import ndimage
from skimage.transform import resize
from joblib import Parallel, delayed
import tensorflow as tf
from sklearn.model_selection import KFold
from tqdm.notebook import tqdm

In [2]:
# load the models

model_path = r"C:\Users\15739\Desktop\UNL - all\Fall 2022\Manuscript\Filtered_90\codes_and_models"

model_names = ['model_samples201.h5', 'model_samples202.h5', 'model_samples203.h5', 'model_samples204.h5','model_samples205.h5']

In [3]:
loaded_models = []
for model in model_names:
    model_ld = tf.keras.models.load_model(model)
    loaded_models.append(model_ld)

In [4]:
numpy_folder = r"C:\Users\15739\Desktop\UNL - all\Fall 2022\Manuscript\Filtered_90\np_test_filtered90"
test_files = os.listdir(numpy_folder)

In [6]:
len(test_files)

65

In [7]:
# Function to get the predictions given model and image

numpy_folder = r"C:\Users\15739\Desktop\UNL - all\Fall 2022\Manuscript\Filtered_90\np_test_filtered90"

def prediction_on_test_data(model, selected_file, stride = 8, kernel_size = 32):
#     load the cnn model
    
# load the image data file
    load_image = np.load(numpy_folder + "\\"+ selected_file)
    
    # append the subwindows together

    # get the subwindows
    sub_windows = []

    # get the image height
    img_height = load_image.shape[0]
    # get the image weight
    img_width = load_image.shape[1]

    for i in  range(0, img_height, stride):
        for j in range(0, img_width, stride):
            sub_window = load_image[i: i + kernel_size, j : j + kernel_size,:]
#             resize the sub window
            sub_window = resize(sub_window, (32,32,3))
    #     append the sub window images
            sub_windows.append((sub_window))

#         stack the subwindows together to pass to the predict function
    all_test_sub_windows = np.stack(sub_windows)

    # now, to get the predictions, pass the sub windows
    with tf.device('/CPU:0'):
        test_image_prediction = model.predict(all_test_sub_windows, batch_size = 100)
    
    # density map
    Density_map = np.zeros((img_height, img_width))

    # counts map
    counts_map = np.zeros((img_height, img_width))
    
    # now, for every window, we will keep adding the values together and also add the counts
    counter = 0
#     need a counter to move into each predicted value in the pred values list
    for ii in range(0, img_height, stride):
        for jj in range(0, img_width, stride):
#         operations for density map
#             get the window of interest
            new_window = Density_map[ii:ii + kernel_size,jj:jj+kernel_size]
#     fill each with the value c_k
            counts_window = np.full((new_window.shape[0], new_window.shape[1]), test_image_prediction[counter])
#     get the shapes of this new window
            cw_height = counts_window.shape[0]
            cw_width = counts_window.shape[1]
#         Do c_k/r_2
            counts_window_new = counts_window/(cw_height*cw_width)
#     This is the value in the window now
            value_window = counts_window_new
#     place the values in the corrsponding area of the density map
            Density_map[ii:ii + kernel_size,jj:jj+kernel_size] = new_window + value_window

#         Let's now focus on capturing the counts of the windows
            new_window_c = counts_map[ii:ii + kernel_size,jj:jj+kernel_size]
#     get the counts area
            count = np.ones((new_window_c.shape[0], new_window_c.shape[1]))
#     keep adding the counts to reflect the addition of densities
            counts_map[ii:ii + kernel_size,jj:jj+kernel_size] = new_window_c + count
#     increase the counter
            counter = counter + 1
            
#         get the normalized count
    normalized_counts = np.divide(Density_map, counts_map)
    
#     entire count on the test set
    pred_on_test = np.sum(normalized_counts)
    
#     return the predicted value
    return(pred_on_test, normalized_counts, selected_file)


Preds for model 1

In [8]:
model = loaded_models[0]

In [9]:
# get the preds and normalized counts

all_preds = []
for file in tqdm(test_files):
    results = prediction_on_test_data(model, file)
    all_preds.append(results)

  0%|          | 0/65 [00:00<?, ?it/s]

In [10]:
len(all_preds)

65

In [11]:
# get the true counts for the 65 test set images
# import the csv file to see how it looks like
coords_data = pd.read_csv(r"C:\Users\15739\Desktop\UNL - all\Fall 2022\Manuscript\Filtered_90\new_annotated_coordinates_all.csv", header = None)

In [12]:
coords_data.head()

Unnamed: 0,0,1,2,3,4
0,190814_djifc6310s_Aug14_SW_row_11_range_2_rep_...,42,45,61,40
1,190814_djifc6310s_Aug14_SW_row_11_range_2_rep_...,40,84,65,41
2,190814_djifc6310s_Aug14_SW_row_11_range_2_rep_...,34,119,76,36
3,190814_djifc6310s_Aug14_SW_row_11_range_2_rep_...,71,430,45,50
4,190814_djifc6310s_Aug14_SW_row_11_range_2_rep_...,59,614,35,46


In [13]:
# add column names
coords_data.columns = ['Image_name', 'top_x', 'top_y', 'width', 'height']

In [14]:
coords_data.head()

Unnamed: 0,Image_name,top_x,top_y,width,height
0,190814_djifc6310s_Aug14_SW_row_11_range_2_rep_...,42,45,61,40
1,190814_djifc6310s_Aug14_SW_row_11_range_2_rep_...,40,84,65,41
2,190814_djifc6310s_Aug14_SW_row_11_range_2_rep_...,34,119,76,36
3,190814_djifc6310s_Aug14_SW_row_11_range_2_rep_...,71,430,45,50
4,190814_djifc6310s_Aug14_SW_row_11_range_2_rep_...,59,614,35,46


In [15]:
len(coords_data['Image_name'].unique())

323

In [16]:
# how many coords in each image? 
# These are the true counts
true_counts_df = pd.DataFrame(coords_data.groupby('Image_name').size()).reset_index()

In [17]:
# see the dataset
true_counts_df.head()

Unnamed: 0,Image_name,0
0,190812_djifc6310s_Aug12_NE_row_16_range_4_rep_...,32
1,190812_djifc6310s_Aug12_NE_row_18_range_4_rep_...,25
2,190812_djifc6310s_Aug12_NE_row_33_range_2_rep_...,51
3,190812_djifc6310s_Aug12_NW_row_17_range_4_rep_...,36
4,190812_djifc6310s_Aug12_NW_row_22_range_2_rep_...,38


In [18]:
test_image_names = [i.split('.')[0] for i in test_files]

In [19]:
test_image_names_final = [i + '.png' for i in test_image_names]

In [20]:
len(test_image_names)

65

In [21]:
# select the images in the test folder only
test_true_counts = true_counts_df.loc[true_counts_df['Image_name'].isin(test_image_names_final)].reset_index(drop = True)

In [22]:
# how many images here?
test_true_counts.shape

(65, 2)

In [23]:
# rename the column 0 as true counts
test_true_counts.columns = ['Image_name', "True_count"]

In [24]:
test_true_counts.head()

Unnamed: 0,Image_name,True_count
0,190812_djifc6310s_Aug12_NW_row_32_range_6_rep_...,53
1,190812_djifc6310s_Aug12_SE_row_21_range_3_rep_...,21
2,190812_djifc6310s_Aug12_SE_row_32_range_4_rep_...,19
3,190812_djifc6310s_Aug12_SW_row_11_range_2_rep_...,33
4,190814_djifc6310s_Aug14_NE_row_33_range_2_rep_...,50


In [25]:
# save this dataframe for future use
test_true_counts.to_csv('true_counts_test_only_sample20.csv', index = False)

In [26]:
preds_list = []
files_list = []
for i in all_preds:
    prediction = i[0]
    files = i[2]
    preds_list.append(prediction)
    files_list.append(files)


In [27]:
files_list_png = [x.split('.')[0] + '.png' for x in files_list]

In [28]:
# d = {'Image name': Image_name, 'Predicted value': pred_data}
preds_df  = list(zip(files_list_png, preds_list))

In [29]:
preds_df = pd.DataFrame(preds_df)

In [30]:
preds_df.columns = ['Image_name', 'Predicted_value']

In [31]:
preds_df.head()

Unnamed: 0,Image_name,Predicted_value
0,190812_djifc6310s_Aug12_NW_row_32_range_6_rep_...,33.046166
1,190812_djifc6310s_Aug12_SE_row_21_range_3_rep_...,23.899351
2,190812_djifc6310s_Aug12_SE_row_32_range_4_rep_...,24.782596
3,190812_djifc6310s_Aug12_SW_row_11_range_2_rep_...,34.859175
4,190814_djifc6310s_Aug14_NE_row_33_range_2_rep_...,39.026368


In [32]:
# merge the true and predicted counts
final_data = pd.merge(test_true_counts, preds_df, on = 'Image_name')

In [33]:
final_data.shape

(65, 3)

In [34]:
mae = np.mean(abs(final_data['True_count'] - final_data['Predicted_value'] ))

In [35]:
mae

8.565102537700852

In [36]:
final_data.to_csv('model1_preds_s20.csv', index = False)

In [37]:
# need to also save the normalized counts
norm_counts = []
for i in all_preds:
    count = i[1]
    norm_counts.append(count)

In [38]:
bb_app = []
for i in range(len(norm_counts)):
    bb = np.save(r'C:\Users\15739\Desktop\UNL - all\Fall 2022\Manuscript\Filtered_90\codes_and_models\norm_counts_np_model1_s20' + '\\' + files_list[i], norm_counts[i])
    bb_app.append(bb)

Preds for model 2

In [41]:
model2 = loaded_models[1]

In [42]:
# get the preds and normalized counts

all_preds_model2 = []
for file in tqdm(test_files):
    results = prediction_on_test_data(model2, file)
    all_preds_model2.append(results)

  0%|          | 0/65 [00:00<?, ?it/s]

In [43]:
preds_list_md2 = []
files_list_md2 = []
for i in all_preds_model2:
    prediction = i[0]
    files = i[2]
    preds_list_md2.append(prediction)
    files_list_md2.append(files)


In [44]:
files_list_png = [x.split('.')[0] + '.png' for x in files_list_md2]

In [45]:
# d = {'Image name': Image_name, 'Predicted value': pred_data}
preds_df  = list(zip(files_list_png, preds_list_md2))

In [46]:
preds_df = pd.DataFrame(preds_df)

In [47]:
preds_df.columns = ['Image_name', 'Predicted_value']

In [48]:
preds_df.head()

Unnamed: 0,Image_name,Predicted_value
0,190812_djifc6310s_Aug12_NW_row_32_range_6_rep_...,35.309359
1,190812_djifc6310s_Aug12_SE_row_21_range_3_rep_...,25.63834
2,190812_djifc6310s_Aug12_SE_row_32_range_4_rep_...,26.636744
3,190812_djifc6310s_Aug12_SW_row_11_range_2_rep_...,37.199845
4,190814_djifc6310s_Aug14_NE_row_33_range_2_rep_...,41.268375


In [49]:
# import the test true counts preciously stored
test_true = pd.read_csv("true_counts_test_only.csv")

In [50]:
test_true.head()

Unnamed: 0,Image_name,True_count
0,190812_djifc6310s_Aug12_NW_row_32_range_6_rep_...,53
1,190812_djifc6310s_Aug12_SE_row_21_range_3_rep_...,21
2,190812_djifc6310s_Aug12_SE_row_32_range_4_rep_...,19
3,190812_djifc6310s_Aug12_SW_row_11_range_2_rep_...,33
4,190814_djifc6310s_Aug14_NE_row_33_range_2_rep_...,50


In [51]:
# merge the true and predicted counts
final_data = pd.merge(test_true, preds_df, on = 'Image_name')

In [52]:
final_data.shape

(65, 3)

In [53]:
mae = np.mean(abs(final_data['True_count'] - final_data['Predicted_value'] ))

In [54]:
mae

9.173101982550504

In [55]:
final_data.to_csv('model2_preds_s20.csv', index = False)

In [56]:
# need to also save the normalized counts
norm_counts = []
for i in all_preds_model2:
    count = i[1]
    norm_counts.append(count)

In [57]:
bb_app = []
for i in range(len(norm_counts)):
    bb = np.save(r'C:\Users\15739\Desktop\UNL - all\Fall 2022\Manuscript\Filtered_90\codes_and_models\norm_counts_np_model2_s20' + '\\' + files_list_md2[i], norm_counts[i])
    bb_app.append(bb)

#### Preds for model 3

In [58]:
model3 = loaded_models[2]

In [59]:
# get the preds and normalized counts

all_preds_model3 = []
for file in tqdm(test_files):
    results = prediction_on_test_data(model3, file)
    all_preds_model3.append(results)

  0%|          | 0/65 [00:00<?, ?it/s]

In [60]:
preds_list_md3 = []
files_list_md3 = []
for i in all_preds_model3:
    prediction = i[0]
    files = i[2]
    preds_list_md3.append(prediction)
    files_list_md3.append(files)


In [61]:
files_list_png = [x.split('.')[0] + '.png' for x in files_list_md3]

In [62]:
# d = {'Image name': Image_name, 'Predicted value': pred_data}
preds_df  = list(zip(files_list_png, preds_list_md3))

In [63]:
preds_df = pd.DataFrame(preds_df)

In [64]:
preds_df.columns = ['Image_name', 'Predicted_value']

In [65]:
preds_df.head()

Unnamed: 0,Image_name,Predicted_value
0,190812_djifc6310s_Aug12_NW_row_32_range_6_rep_...,33.68751
1,190812_djifc6310s_Aug12_SE_row_21_range_3_rep_...,24.163655
2,190812_djifc6310s_Aug12_SE_row_32_range_4_rep_...,25.249449
3,190812_djifc6310s_Aug12_SW_row_11_range_2_rep_...,35.47671
4,190814_djifc6310s_Aug14_NE_row_33_range_2_rep_...,39.272457


In [66]:
# import the test true counts preciously stored
test_true = pd.read_csv("true_counts_test_only.csv")

In [67]:
test_true.head()

Unnamed: 0,Image_name,True_count
0,190812_djifc6310s_Aug12_NW_row_32_range_6_rep_...,53
1,190812_djifc6310s_Aug12_SE_row_21_range_3_rep_...,21
2,190812_djifc6310s_Aug12_SE_row_32_range_4_rep_...,19
3,190812_djifc6310s_Aug12_SW_row_11_range_2_rep_...,33
4,190814_djifc6310s_Aug14_NE_row_33_range_2_rep_...,50


In [68]:
# merge the true and predicted counts
final_data = pd.merge(test_true, preds_df, on = 'Image_name')

In [69]:
final_data.shape

(65, 3)

In [70]:
mae = np.mean(abs(final_data['True_count'] - final_data['Predicted_value'] ))

In [71]:
mae

8.70889736684686

In [72]:
final_data.to_csv('model3_preds_s20.csv', index = False)

In [73]:
# need to also save the normalized counts
norm_counts3 = []
for i in all_preds_model3:
    count = i[1]
    norm_counts3.append(count)

In [74]:
bb_app = []
for i in range(len(norm_counts3)):
    bb = np.save(r'C:\Users\15739\Desktop\UNL - all\Fall 2022\Manuscript\Filtered_90\codes_and_models\norm_counts_np_model3_s20' + '\\' + files_list_md3[i], norm_counts3[i])
    bb_app.append(bb)

#### Preds for model 4

In [75]:
model4 = loaded_models[3]

In [76]:
# get the preds and normalized counts

all_preds_model4 = []
for file in tqdm(test_files):
    results = prediction_on_test_data(model4, file)
    all_preds_model4.append(results)

  0%|          | 0/65 [00:00<?, ?it/s]

In [77]:
preds_list_md4 = []
files_list_md4 = []
for i in all_preds_model4:
    prediction = i[0]
    files = i[2]
    preds_list_md4.append(prediction)
    files_list_md4.append(files)


In [78]:
files_list_png = [x.split('.')[0] + '.png' for x in files_list_md4]

In [79]:
# d = {'Image name': Image_name, 'Predicted value': pred_data}
preds_df  = list(zip(files_list_png, preds_list_md4))

In [80]:
preds_df = pd.DataFrame(preds_df)

In [81]:
preds_df.columns = ['Image_name', 'Predicted_value']

In [82]:
preds_df.head()

Unnamed: 0,Image_name,Predicted_value
0,190812_djifc6310s_Aug12_NW_row_32_range_6_rep_...,33.587952
1,190812_djifc6310s_Aug12_SE_row_21_range_3_rep_...,23.933685
2,190812_djifc6310s_Aug12_SE_row_32_range_4_rep_...,24.741406
3,190812_djifc6310s_Aug12_SW_row_11_range_2_rep_...,35.860124
4,190814_djifc6310s_Aug14_NE_row_33_range_2_rep_...,40.224085


In [83]:
# import the test true counts preciously stored
test_true = pd.read_csv("true_counts_test_only.csv")

In [84]:
test_true.head()

Unnamed: 0,Image_name,True_count
0,190812_djifc6310s_Aug12_NW_row_32_range_6_rep_...,53
1,190812_djifc6310s_Aug12_SE_row_21_range_3_rep_...,21
2,190812_djifc6310s_Aug12_SE_row_32_range_4_rep_...,19
3,190812_djifc6310s_Aug12_SW_row_11_range_2_rep_...,33
4,190814_djifc6310s_Aug14_NE_row_33_range_2_rep_...,50


In [85]:
# merge the true and predicted counts
final_data = pd.merge(test_true, preds_df, on = 'Image_name')

In [86]:
final_data.shape

(65, 3)

In [87]:
mae = np.mean(abs(final_data['True_count'] - final_data['Predicted_value'] ))

In [88]:
mae

8.812369607453055

In [89]:
final_data.to_csv('model4_preds_s20.csv', index = False)

In [90]:
# need to also save the normalized counts
norm_counts4 = []
for i in all_preds_model4:
    count = i[1]
    norm_counts4.append(count)

In [91]:
bb_app = []
for i in range(len(norm_counts4)):
    bb = np.save(r'C:\Users\15739\Desktop\UNL - all\Fall 2022\Manuscript\Filtered_90\codes_and_models\norm_counts_np_model4_s20' + '\\' + files_list_md4[i], norm_counts4[i])
    bb_app.append(bb)

#### Model 5 Preds

In [92]:
model5 = loaded_models[4]

In [93]:
# get the preds and normalized counts

all_preds_model5 = []
for file in tqdm(test_files):
    results = prediction_on_test_data(model5, file)
    all_preds_model5.append(results)

  0%|          | 0/65 [00:00<?, ?it/s]

In [94]:
preds_list_md5 = []
files_list_md5 = []
for i in all_preds_model5:
    prediction = i[0]
    files = i[2]
    preds_list_md5.append(prediction)
    files_list_md5.append(files)


In [95]:
files_list_png = [x.split('.')[0] + '.png' for x in files_list_md5]

In [96]:
# d = {'Image name': Image_name, 'Predicted value': pred_data}
preds_df  = list(zip(files_list_png, preds_list_md5))

In [97]:
preds_df = pd.DataFrame(preds_df)

In [98]:
preds_df.columns = ['Image_name', 'Predicted_value']

In [99]:
preds_df.head()

Unnamed: 0,Image_name,Predicted_value
0,190812_djifc6310s_Aug12_NW_row_32_range_6_rep_...,32.650041
1,190812_djifc6310s_Aug12_SE_row_21_range_3_rep_...,23.933525
2,190812_djifc6310s_Aug12_SE_row_32_range_4_rep_...,25.06481
3,190812_djifc6310s_Aug12_SW_row_11_range_2_rep_...,34.429972
4,190814_djifc6310s_Aug14_NE_row_33_range_2_rep_...,38.769783


In [100]:
# import the test true counts preciously stored
test_true = pd.read_csv("true_counts_test_only.csv")

In [101]:
test_true.head()

Unnamed: 0,Image_name,True_count
0,190812_djifc6310s_Aug12_NW_row_32_range_6_rep_...,53
1,190812_djifc6310s_Aug12_SE_row_21_range_3_rep_...,21
2,190812_djifc6310s_Aug12_SE_row_32_range_4_rep_...,19
3,190812_djifc6310s_Aug12_SW_row_11_range_2_rep_...,33
4,190814_djifc6310s_Aug14_NE_row_33_range_2_rep_...,50


In [102]:
# merge the true and predicted counts
final_data = pd.merge(test_true, preds_df, on = 'Image_name')

In [103]:
final_data.shape

(65, 3)

In [104]:
mae = np.mean(abs(final_data['True_count'] - final_data['Predicted_value'] ))

In [105]:
mae

8.5217717110005

In [106]:
final_data.to_csv('model5_preds_s20.csv', index = False)

In [107]:
# need to also save the normalized counts
norm_counts5 = []
for i in all_preds_model5:
    count = i[1]
    norm_counts5.append(count)

In [108]:
bb_app = []
for i in range(len(norm_counts5)):
    bb = np.save(r'C:\Users\15739\Desktop\UNL - all\Fall 2022\Manuscript\Filtered_90\codes_and_models\norm_counts_np_model5_s20' + '\\' + files_list_md5[i], norm_counts5[i])
    bb_app.append(bb)