# Load model / weights, set files & batch size

In [1]:
import os
import absl.logging

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
absl.logging.set_verbosity(absl.logging.ERROR)

from random import shuffle
from random import sample
import tensorflow as tf
from tqdm import tqdm
from glob import glob
import pandas as pd
import numpy as np
import model


def train(test_or_data: str, files: list, restart: str, callbacks: list):
    nn = model.create_model(model.metrics)
    if restart == "y" or "Y":
        np.save("models/mae.npy", [])
        np.save("models/precision.npy", [])
    else:
        checkpoint = tf.train.latest_checkpoint("./models")
        nn.load_weights(checkpoint)

    for batch in tqdm(range(len(files) // model.batch_size)):
        batch_files = files[batch * model.batch_size : (batch + 1) * model.batch_size]
        spectra = []
        labels = []
        for file in batch_files:
            folder = file.split("/")[3]
            if folder == f"{test_or_data}":
                df = pd.read_parquet(file)
                spectra.append([a.tolist() for a in df.spectra.values])
                labels.append(list(df.labels.values))

            elif folder == "backgrounds":
                with open(file) as f:
                    contents = sample(f.readlines(), model.num_spectra)
                    for line in contents:
                        spectra.append([float(i) for i in line.split(",")])
                    labels.append([0 for _ in range(13)])

        if len(spectra) != len(labels):
            continue

        spectra = np.array(spectra).reshape(model.batch_size * model.num_spectra, 4563)
        labels = np.array(labels).reshape(model.batch_size * model.num_spectra, 13)

        nn.fit(spectra, labels, epochs=model.epochs, verbose=False, callbacks=callbacks)


class SaveMetricsCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        np.save("models/mae.npy", np.append(np.load("models/mae.npy"), logs["mae"]))
        np.save("models/precision.npy", np.append(np.load("models/precision.npy"), logs["precision"]))


metric_callback = SaveMetricsCallback()
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=model.checkpoint_folder,
    save_weights_only=True,
    monitor="mae",
    verbose=0,
    save_best_only=True,
)


test_or_data = "test_set"

data_files = glob(f"../1_data_generation/training_data/{test_or_data}/*/*")
background_files = glob("../1_data_generation/training_data/backgrounds/*")

# Put data_files and background files together
# Double the data file occurences as to get more null sampels
if test_or_data == "test_set":
    files = data_files
else:
    files = data_files + background_files + background_files

shuffle(files)
print(f"Number of Files: {len(files)}")
print(f"Number of Batches: {len(files)//model.batch_size}")


Number of Files: 8972
Number of Batches: 179


# Train & Save

In [2]:
restart = input("Do you want to restart? [y]es or [n]o:\t")
train(test_or_data, files, restart, [metric_callback, checkpoint_callback])

  3%|▎         | 6/179 [13:44<6:30:39, 135.49s/it]