In [1]:
# import the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import random
import shutil
# import cv2
from scipy import ndimage
# from skimage.transform import resize
from joblib import Parallel, delayed
import tensorflow as tf
from sklearn.model_selection import KFold
# from tqdm.notebook import tqdm

2022-11-30 14:34:44.923937: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
tf.config.list_physical_devices ('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

First do the data pre-processing, and store the data so that they can be used later if needed

In [3]:
# load the numpy files of the images and the sub image counts

# data files
all_train_sub_images = np.load(r"/work/statsgeneral/gayara/Filtered_90/Data_files/all_sub_windows_float16.npy")

In [4]:
all_train_sub_images.shape

(1347113, 32, 32, 3)

In [5]:
all_train_sub_images_counts = np.load(r"/work/statsgeneral/gayara/Filtered_90/Data_files/all_sub_windows_counts_float16.npy")

In [6]:
all_train_sub_images_counts.shape

(1347113,)

In [7]:
# generate a random sample based on a sequence

# fix the seed
random.seed(32)
# random numbers for validation data
rand_digits_for_validation = random.sample(range(len(all_train_sub_images)),67356)

# all data
rand_all_available = random.sample(range(len(all_train_sub_images)),len(all_train_sub_images))


In [8]:
# random numbers for train data
train_indices_all = set(rand_all_available) - set(rand_digits_for_validation)

In [9]:
# Get the validation data and the y labels
X_valid_list = [all_train_sub_images[idx] for idx in rand_digits_for_validation]
y_valid_list = [all_train_sub_images_counts[idx] for idx in rand_digits_for_validation]

In [10]:
# stack these to pass into the model
X_valid = np.stack(X_valid_list)
y_valid = np.stack(y_valid_list)

In [11]:
# get everything else as tran data, but later do the bootstraps from these data only.
X_train_all_list = [all_train_sub_images[idx] for idx in train_indices_all]
y_train_all_list = [all_train_sub_images_counts[idx] for idx in train_indices_all]

In [12]:
X_train_all = np.stack(X_train_all_list)
y_train_all = np.stack(y_train_all_list)

In [13]:
X_train_all.shape

(1279757, 32, 32, 3)

In [14]:
def train_bootstrap(image_values, counts):
    train_index = random.sample(range(len(image_values)),269423)
    
    train_X_list = [image_values[idx] for idx in train_index]
    train_X = np.stack(train_X_list)
    train_y_list = [counts[idx] for idx in train_index]
    train_y = np.stack(train_y_list)
    return(train_X, train_y)

In [15]:
# Do this in a loop 5 times to get the train X, and y values
X_train_all_final = []
y_train_all_final = []
for i in range(5):
    retuned_vals = train_bootstrap(X_train_all, y_train_all)
    X_train_all_final.append(retuned_vals[0])
    y_train_all_final.append(retuned_vals[1])

In [16]:
# need to introduce a counter for saving the models
counter = 96
for i in range(len(X_train_all_final)):
    X_train = X_train_all_final[i]
    y_train = y_train_all_final[i]
    tf.keras.backend.clear_session()
    # load the pre-trained model on tassels
    model = tf.keras.models.load_model('/home/statsgeneral/gayara/Tasselnet/Filtered_90/tasselnet1_overlapping_w32.h5')
    # look at the input shape
    model.input
    
    # need to remove some of the last layers
    output_trial = model.layers[-8].output
    
    # create the functional API model
    reduced_model = tf.keras.models.Model(model.input, output_trial)
    
    # okay, now need to add back the dropout, the dense and activation

    # add dropout
    added_dropout = tf.keras.layers.Dropout(0.5, name = "New_dropout")(model.layers[-8].output)

    # add flatten
    added_flatten = tf.keras.layers.Flatten(name = "Flatten2")(added_dropout)

    # add dense
    added_dense = tf.keras.layers.Dense(512, name = "New_Dense")(added_flatten)

    # add activation
    added_Act = tf.keras.layers.Activation('relu', name = "New_Activation")(added_dense)

    # add dropout
    added_dropout2 = tf.keras.layers.Dropout(0.5, name = "New_dropout2")(added_Act)

    # add dense
    added_dense2 = tf.keras.layers.Dense(1, name = "New_Dense2")(added_dropout2)

    # add activation
    added_Act2 = tf.keras.layers.Activation('relu', name = "New_Activation2")(added_dense2)
    
    # define the new model with functional API
    new_model = tf.keras.models.Model(model.input, added_Act2)
    
    #     freeze layers
    for layer in model.layers:
        layer.trainable = False
    
    # compile the model
    opt = tf.keras.optimizers.Adam(learning_rate=0.0001)
    new_model.compile(loss='mean_squared_error', optimizer=opt, metrics = ['mean_absolute_error'])
    
    # add early stopping
    es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', restore_best_weights = True, verbose=1, patience=5)
    
    #     fit the frozen model
    history = new_model.fit(X_train, y_train,
          epochs = 50, callbacks = [es],
          validation_data = (X_valid, y_valid), 
                       batch_size = 2000, validation_batch_size = 2000)
    
    
    # # unfreeze few layers and retrain
    model.trainable = True

    set_trainable = False

    for layer in model.layers:
        if layer.name == 'conv2d_3':
            set_trainable = True
        if set_trainable:
            layer.trainable = True
        else:
            layer.trainable = False
            
    # # compile the mdoel
    opt = tf.keras.optimizers.Adam(learning_rate=0.0001)
    new_model.compile(loss='mean_squared_error', optimizer=opt, metrics = ['mean_absolute_error'])
    
    # # add early stopping
    es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', restore_best_weights = True, verbose=1, patience=5)
    
    # # fit the model (fine tuned)
    history_new = new_model.fit(X_train, y_train,
          epochs = 50, callbacks = [es],
          validation_data = (X_valid, y_valid), 
                       batch_size = 2000, validation_batch_size = 2000)
    
    #     save the model
    model_name = 'model_bootstrap' + str(counter) + '.h5'
    counter = counter + 1
    path = "/home/statsgeneral/gayara/Tasselnet/Filtered_90/models" + "/" + model_name
    new_model.save(path)
    

2022-11-30 14:35:08.469040: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-11-30 14:35:08.983400: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 30965 MB memory:  -> device: 0, name: Tesla V100-PCIE-32GB, pci bus id: 0000:5e:00.0, compute capability: 7.0


Epoch 1/50


2022-11-30 14:35:15.059073: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8401
2022-11-30 14:35:15.593472: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-11-30 14:35:15.594012: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-11-30 14:35:15.594026: W tensorflow/stream_executor/gpu/asm_compiler.cc:80] Couldn't get ptxas version string: INTERNAL: Couldn't invoke ptxas --version
2022-11-30 14:35:15.594333: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-11-30 14:35:15.594381: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] INTERNAL: Failed to launch ptxas
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 6: early stopping
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 6: early stopping
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/