## Zoobot Test
Since downloading new code, have been having issues with running Zoobot..

Will use this Notebook to at least get to training, and get that running. 

In [1]:
import os
import glob
import random
import shutil
import pandas as pd
from PIL import Image

import tensorflow as tf
import numpy as np
from tensorflow.keras import layers, regularizers
import pandas as pd
from sklearn.model_selection import train_test_split

from zoobot.shared import schemas, label_metadata
from zoobot.tensorflow.data_utils import image_datasets
from zoobot.tensorflow.estimators import preprocess, define_model, alexnet_baseline, small_cnn_baseline
from zoobot.tensorflow.predictions import predict_on_tfrecords, predict_on_dataset
from zoobot.tensorflow.training import training_config
from zoobot.tensorflow.transfer_learning import utils
from zoobot.tensorflow.estimators import custom_layers

## Importing Manifest + Updating

In [2]:
folder = 'C:/Users/oryan/Documents/zoobot_new'

In [3]:
manifest = pd.read_csv(f'{folder}/cutouts/hubble-training-300-300-rgb/hubble-thumb-manifest-checked.csv',index_col = 0).reset_index().rename(columns={'index':'id'})

In [4]:
manifest_local = (
    manifest
    .assign(thumbnail_local = manifest.thumbnail_path.apply(lambda x: f'{folder}/cutouts/hubble-training-300-300-rgb/{os.path.basename(x)}'))
    .drop(columns='thumbnail_path')
)

In [5]:
manifest_local

Unnamed: 0,id,id.1,RA,Dec,interacting,thumbnail_local
0,0,AHZ10004js,215.001397,52.955399,1,C:/Users/oryan/Documents/zoobot_new/cutouts/hu...
1,1,AHZ20000ak,150.387822,1.594000,1,C:/Users/oryan/Documents/zoobot_new/cutouts/hu...
2,2,AHZ20000f7,150.199560,1.602201,1,C:/Users/oryan/Documents/zoobot_new/cutouts/hu...
3,3,AHZ20000gx,150.178616,1.585255,1,C:/Users/oryan/Documents/zoobot_new/cutouts/hu...
4,4,AHZ20000hj,150.196404,1.625565,1,C:/Users/oryan/Documents/zoobot_new/cutouts/hu...
...,...,...,...,...,...,...
549,549,AHZ2000xli,150.410260,2.366218,0,C:/Users/oryan/Documents/zoobot_new/cutouts/hu...
550,550,AHZ2001l95,150.340542,2.819201,0,C:/Users/oryan/Documents/zoobot_new/cutouts/hu...
551,551,AHZ2000a8x,150.394694,1.800582,0,C:/Users/oryan/Documents/zoobot_new/cutouts/hu...
552,552,AHZ2000e76,149.587072,1.919982,0,C:/Users/oryan/Documents/zoobot_new/cutouts/hu...


In [6]:
requested_img_size = 300
batch_size = 8
file_format = 'png'

In [7]:
paths = list(manifest_local['thumbnail_local'])
labels = list(manifest_local['interacting'])

In [8]:
paths_tmp = []
for i in paths:
    paths_tmp.append(i.replace('_4','_3'))

In [9]:
paths = paths_tmp

In [10]:
paths_train, paths_val, labels_train, labels_val = train_test_split(paths, labels, test_size = 0.2, random_state = 42)
assert set(paths_train).intersection(set(paths_val)) == set()

In [11]:
raw_train_dataset = image_datasets.get_image_dataset(
    paths_train,
    file_format = file_format,
    requested_img_size = requested_img_size,
    batch_size = batch_size,
    labels = labels_train
)
raw_val_dataset = image_datasets.get_image_dataset(
    paths_val,
    file_format = file_format,
    requested_img_size = requested_img_size,
    batch_size = batch_size,
    labels = labels_val
)

[{'label': <tf.Tensor: shape=(8,), dtype=int32, numpy=array([0, 0, 1, 0, 0, 0, 0, 1])>}]
[{'label': <tf.Tensor: shape=(8,), dtype=int32, numpy=array([1, 1, 0, 0, 1, 0, 1, 1])>}]


In [12]:
len(paths)

554

In [13]:
preprocess_config = preprocess.PreprocessingConfig(
    label_cols = ['label'],
    input_size = requested_img_size, 
    normalise_from_uint8=True,
    input_channels = 3,
    make_greyscale=True,
    permute_channels=False
)

In [14]:
train_dataset = preprocess.preprocess_dataset(raw_train_dataset, preprocess_config)
val_dataset = preprocess.preprocess_dataset(raw_val_dataset,preprocess_config)

In [15]:
pretrained_checkpoint = 'C:/Users/oryan/Documents/zoobot_new/pretrained-models/replicated_train_only_greyscale_tf/replicated_train_only_greyscale_tf/checkpoint'

In [16]:
crop_size = int(requested_img_size * 0.75)
resize_size = 224

In [17]:
log_dir = 'C:/Users/oryan/Documents/zoobot_new/model-logs/2022-04-26/'

In [37]:
base_model = define_model.load_model(
    pretrained_checkpoint,
    include_top = False,
    input_size = requested_img_size,
    crop_size = crop_size,
    resize_size = resize_size,
    channels=1,
    output_dim = None,
    expect_partial=True
)



In [42]:
base_model.trainable = False

In [43]:
base_model.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 perma_random_rotation_4 (Pe  (None, 300, 300, 1)      0         
 rmaRandomRotation)                                              
                                                                 
 perma_random_flip_4 (PermaR  (None, 300, 300, 1)      0         
 andomFlip)                                                      
                                                                 
 perma_random_crop_4 (PermaR  (None, 224, 224, 1)      0         
 andomCrop)                                                      
                                                                 
 sequential_11 (Sequential)  (None, 7, 7, 1280)        4048988   
                                                                 
Total params: 4,048,989
Trainable params: 0
Non-trainable params: 4,048,989
___________________________________________

In [36]:
base_model.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 perma_random_rotation_3 (Pe  (None, 300, 300, 1)      0         
 rmaRandomRotation)                                              
                                                                 
 perma_random_flip_3 (PermaR  (None, 300, 300, 1)      0         
 andomFlip)                                                      
                                                                 
 perma_random_crop_3 (PermaR  (None, 224, 224, 1)      0         
 andomCrop)                                                      
                                                                 
 sequential_9 (Sequential)   (None, 7, 7, 1280)        4048988   
                                                                 
 global_average_pooling2d_2   (None, 1280)             0         
 (GlobalAveragePooling2D)                             

In [38]:
new_head = tf.keras.Sequential([
        layers.InputLayer(input_shape=(7,7,1280)),
        layers.GlobalAveragePooling2D(),
        layers.Dropout(0.75),
        layers.Dense(64,activation='relu'),
        layers.Dropout(0.75),
        layers.Dense(64,activation='relu'),
        layers.Dropout(0.75),
        layers.Dense(1,activation='sigmoid',name='sigmoid_output')
    ])

In [26]:
new_head.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 global_average_pooling2d (G  (None, 1280)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dropout (Dropout)           (None, 1280)              0         
                                                                 
 dense (Dense)               (None, 64)                81984     
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 64)                4160      
                                                                 
 dropout_2 (Dropout)         (None, 64)                0         
                                                      

In [39]:
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(requested_img_size,requested_img_size,1)),
    base_model,
    new_head
])

In [40]:
model.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_10 (Sequential)  (None, 7, 7, 1280)        4048989   
                                                                 
 sequential_12 (Sequential)  (None, 1)                 86209     
                                                                 
Total params: 4,135,198
Trainable params: 4,093,181
Non-trainable params: 42,017
_________________________________________________________________


In [22]:
epochs = 15
loss = tf.keras.losses.binary_crossentropy

In [23]:
model.compile(
    loss = loss,
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001),
    metrics = ['accuracy']
)
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential (Sequential)     (None, 7, 7, 1280)        4048989   
                                                                 
 sequential_2 (Sequential)   (None, 1)                 86209     
                                                                 
Total params: 4,135,198
Trainable params: 86,209
Non-trainable params: 4,048,989
_________________________________________________________________


In [40]:
train_config = training_config.TrainConfig(
    log_dir=log_dir,
    epochs=epochs,
    patience=int(epochs/6)
)

In [42]:
training_config.train_estimator(
    model,
    train_config,
    train_dataset,
    val_dataset,
    eager=True,
    verbose=1
)



Epoch 1/15

KeyboardInterrupt: 

In [109]:
300 * 300 * 3

270000