## Zoobot Test
Since downloading new code, have been having issues with running Zoobot..

Will use this Notebook to at least get to training, and get that running. 

In [1]:
import os
import glob
import random
import shutil
import pandas as pd
from PIL import Image

import tensorflow as tf
import numpy as np
from tensorflow.keras import layers, regularizers
import pandas as pd
from sklearn.model_selection import train_test_split

from zoobot.shared import schemas, label_metadata
from zoobot.tensorflow.data_utils import image_datasets
from zoobot.tensorflow.estimators import preprocess, define_model, alexnet_baseline, small_cnn_baseline
from zoobot.tensorflow.predictions import predict_on_tfrecords, predict_on_dataset
from zoobot.tensorflow.training import training_config
from zoobot.tensorflow.transfer_learning import utils
from zoobot.tensorflow.estimators import custom_layers

## Importing Manifest + Updating

In [2]:
folder = 'C:/Users/oryan/Documents/esac-project'

In [3]:
manifest = pd.read_csv(f'{folder}/cutouts/hubble-training-100-all/hubble-thumb-manifest-checked.csv',index_col = 0).reset_index().rename(columns={'index':'id'})

In [4]:
manifest_local = (
    manifest
    .assign(thumbnail_local = manifest.thumbnail_path.apply(lambda x: f'C:/Users/oryan/Documents/esac-project/cutouts/hubble-training-100-all/{os.path.basename(x)}'))
    .drop(columns='thumbnail_path')
)

In [5]:
manifest_local

Unnamed: 0,id,RA,Dec,interacting,thumbnail_local
0,AHZ10004js,215.001397,52.955399,1,C:/Users/oryan/Documents/esac-project/cutouts/...
1,AHZ20000ak,150.387822,1.594000,1,C:/Users/oryan/Documents/esac-project/cutouts/...
2,AHZ20000f7,150.199560,1.602201,1,C:/Users/oryan/Documents/esac-project/cutouts/...
3,AHZ20000gx,150.178616,1.585255,1,C:/Users/oryan/Documents/esac-project/cutouts/...
4,AHZ20000hj,150.196404,1.625565,1,C:/Users/oryan/Documents/esac-project/cutouts/...
...,...,...,...,...,...
548,AHZ2000xli,150.410260,2.366218,0,C:/Users/oryan/Documents/esac-project/cutouts/...
549,AHZ2001l95,150.340542,2.819201,0,C:/Users/oryan/Documents/esac-project/cutouts/...
550,AHZ2000a8x,150.394694,1.800582,0,C:/Users/oryan/Documents/esac-project/cutouts/...
551,AHZ2000e76,149.587072,1.919982,0,C:/Users/oryan/Documents/esac-project/cutouts/...


In [6]:
requested_img_size = 300
batch_size = 3
file_format = 'png'

In [7]:
paths = list(manifest_local['thumbnail_local'])
labels = list(manifest_local['interacting'])

In [8]:
paths_train, paths_val, labels_train, labels_val = train_test_split(paths, labels, test_size = 0.2, random_state = 42)
assert set(paths_train).intersection(set(paths_val)) == set()

In [9]:
raw_train_dataset = image_datasets.get_image_dataset(
    paths_train,
    file_format = file_format,
    requested_img_size = requested_img_size,
    batch_size = batch_size,
    labels = labels_train
)
raw_val_dataset = image_datasets.get_image_dataset(
    paths_val,
    file_format = file_format,
    requested_img_size = requested_img_size,
    batch_size = batch_size,
    labels = labels_val
)

[{'label': <tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 0, 0])>}]
[{'label': <tf.Tensor: shape=(3,), dtype=int32, numpy=array([0, 1, 0])>}]


In [10]:
preprocess_config = preprocess.PreprocessingConfig(
    label_cols = ['label'],
    input_size = requested_img_size, 
    normalise_from_uint8=True,
    input_channels = 1,
    make_greyscale=False,
    permute_channels=False
)

In [11]:
train_dataset = preprocess.preprocess_dataset(raw_train_dataset, preprocess_config)
val_dataset = preprocess.preprocess_dataset(raw_val_dataset,preprocess_config)

In [12]:
pretrained_checkpoint = 'C:/Users/oryan/Documents/zoobot_new/pretrained-models/replicated_train_only_greyscale_tf/replicated_train_only_greyscale_tf/checkpoint'

In [13]:
crop_size = int(requested_img_size * 0.75)
resize_size = 224

In [14]:
log_dir = 'C:/Users/oryan/Documents/zoobot_new/model-logs/2022-04-25/'

In [15]:
base_model = define_model.load_model(
    pretrained_checkpoint,
    include_top = False,
    input_size = requested_img_size,
    crop_size = crop_size,
    resize_size = resize_size,
    output_dim = None,
    expect_partial=True
)



In [16]:
base_model.trainable = False

In [17]:
new_head = tf.keras.Sequential([
        layers.InputLayer(input_shape=(7,7,1280)),
        layers.GlobalAveragePooling2D(),
        layers.Dropout(0.75),
        layers.Dense(64,activation='relu'),
        layers.Dropout(0.75),
        layers.Dense(64,activation='relu'),
        layers.Dropout(0.75),
        layers.Dense(1,activation='sigmoid',name='sigmoid_output')
    ])

In [18]:
model = tf.keras.Sequential([
    tf.keras.Input(shape=(requested_img_size,requested_img_size,1)),
    base_model,
    new_head
])

In [19]:
epochs = 15
loss = tf.keras.losses.binary_crossentropy

In [20]:
model.compile(
    loss = loss,
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001),
    metrics = ['accuracy']
)
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential (Sequential)     (None, 7, 7, 1280)        4048989   
                                                                 
 sequential_2 (Sequential)   (None, 1)                 86209     
                                                                 
Total params: 4,135,198
Trainable params: 86,209
Non-trainable params: 4,048,989
_________________________________________________________________


In [21]:
train_config = training_config.TrainConfig(
    log_dir=log_dir,
    epochs=epochs,
    patience=int(epochs/6)
)

In [None]:
training_config.train_estimator(
    model,
    train_config,
    train_dataset,
    val_dataset
)

Epoch 1/15

 Ending step:  0.0
148/148 - 23s - loss: 1.3837 - accuracy: 0.4955 - val_loss: 0.7024 - val_accuracy: 0.4955 - 23s/epoch - 155ms/step
Epoch 2/15

 Ending step:  3.0
148/148 - 20s - loss: 0.7455 - accuracy: 0.5158 - val_loss: 0.6959 - val_accuracy: 0.5045 - 20s/epoch - 133ms/step
Epoch 3/15

 Ending step:  6.0
148/148 - 20s - loss: 1.2422 - accuracy: 0.5226 - val_loss: 0.6951 - val_accuracy: 0.4955 - 20s/epoch - 138ms/step
Epoch 4/15

 Ending step:  9.0
148/148 - 22s - loss: 0.9143 - accuracy: 0.5701 - val_loss: 0.6952 - val_accuracy: 0.4955 - 22s/epoch - 149ms/step
Epoch 5/15

 Ending step:  12.0
148/148 - 22s - loss: 1.8037 - accuracy: 0.5475 - val_loss: 0.6928 - val_accuracy: 0.5045 - 22s/epoch - 150ms/step
Epoch 6/15

 Ending step:  15.0
148/148 - 25s - loss: 0.6785 - accuracy: 0.6018 - val_loss: 0.6928 - val_accuracy: 0.5090 - 25s/epoch - 168ms/step
Epoch 7/15
