In [None]:
## This is the answer guide notebook 
## It has the un-modified data and good hyperparams

In [None]:
! pip install deepbench


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from deepbench.astro_object import StarObject, GalaxyObject 

from tensorflow.keras.layers import Input, Dropout, Conv1D, Dense, AvgPool1D, Flatten
from tensorflow.keras.models import Model

import tensorflow as tf 

import math 
from sklearn.metrics import roc_curve, confusion_matrix


In [None]:
# don't worry too much about how this simulation is being run. It's fine. Don't worry. I promise :3c 

# This generates data as per the Secret Parameters 
# I request you don't change anything here or look too closely. 
# Otherwise! this challenge won't be as fun

class SkyGenerator(tf.keras.utils.Sequence): 
    def __init__(self, n_samples, batch_size=64, pre_processing=None, train=True, shuffle=False):
        self.n_samples = n_samples

        self.pre_processing = pre_processing

        self.batch_size = batch_size
        self.shuffle = shuffle

        self.image_size = 28
        self.noise_level = 0.05

        self.rng = np.random.default_rng(seed=42) # Seed for the main notebook

        self.labels = self.decide_labels()

    def decide_labels(self): 
        n_stars = self.rng.integers(low=int(.45*self.n_samples), high=int(.65*self.n_samples))
        n_galaxies = self.n_samples-n_stars
        labels = [0 for _ in range(n_stars)] + [1 for _ in range(n_galaxies)]
        if self.shuffle: 
            self.rng.shuffle(labels)
        return np.asarray(labels)
 
    def generate_image(self, label): 
        radius = self.rng.integers(low=1, high=self.image_size/2)
        center_x = self.rng.integers(low=1, high=self.image_size)
        center_y = self.rng.integers(low=1, high=self.image_size)

        if label == 0: 
            image = StarObject(
                image_dimensions=self.image_size,
                noise=self.noise_level,
                radius=radius
                    ).create_object(
                        center_x=center_x, center_y=center_y
                        )
 
        else: 
            image = GalaxyObject(
                image_dimensions=self.image_size,
                noise_level=self.noise_level,
                radius=radius
                    ).create_object(
                        center_x=center_x, center_y=center_y
                        )

        if self.pre_processing is not None: 
            image = self.pre_processing.transform(image)

        return image

    def __len__(self):
        return math.ceil(self.n_samples / self.batch_size)

    def __getitem__(self, idx):
        low = idx * self.batch_size
        high = min(low + self.batch_size, len(self.labels))
        batch_y = self.labels[low:high]
        batch_x = np.zeros((len(batch_y), self.image_size, self.image_size))
        for index, label in enumerate(batch_y): 
            batch_x[index] = self.generate_image(label)

        return batch_x, batch_y

In [None]:
def make_model(): 
    in_layer = Input((28, 28))
    x = Conv1D(filters=4, kernel_size=2)(in_layer)
    x = Conv1D(filters=8, kernel_size=4)(x)
    x = Conv1D(filters=12, kernel_size=6)(x)
    
    x = AvgPool1D(6)(x)

    x = Conv1D(filters=4, kernel_size=2)(in_layer)
    x = Conv1D(filters=8, kernel_size=4)(x)
    x = Conv1D(filters=12, kernel_size=6)(x)
    
    x = AvgPool1D(6)(x)

    x = Flatten()(x)
    x = Dense(20, activation='relu')(x)

    x = Dropout(0.3)(x)
    output = Dense(1, activation='sigmoid')(x)
    model = Model(in_layer, output)
    
    return model

In [None]:
model = make_model()
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.SGD(0.01))
model.summary()

In [None]:
train_generator = SkyGenerator(n_samples=1280, shuffle=True)
val_generator = SkyGenerator(n_samples=1280, train=False, shuffle=True)

history = model.fit(
    train_generator, 
    validation_data=val_generator, 
    epochs=80, 
    verbose=1
    ).history


In [None]:
# Plot history
def plot_history(history): 
    loss = history['loss']
    epochs = range(len(loss))

    val_loss = history['val_loss']

    plt.plot(epochs, loss, label="Train")
    plt.plot(epochs, val_loss, label='Validation')

    plt.title("Loss History")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()

plot_history(history)

In [None]:
# Making a different testing set (So people can see the in-domain vs out-of-domain)

test_generator = SkyGenerator(n_samples=1280, train=False, shuffle=True)

def make_prediction(test_generator): 
    predictions = model.predict(test_generator)
    prediction_classes = np.where(predictions<0.5, 0, 1) # Challenge - make this backwards
    labels = test_generator.labels
    return prediction_classes, labels

def test_quality(prediction, labels): 
    accuracy = tf.keras.metrics.BinaryAccuracy()(prediction, labels)
    return accuracy.numpy()

In [None]:
prediction, labels = make_prediction(test_generator)

test_quality(prediction, labels)

In [None]:
# Metrics and evaluation
def plot_test_results(predictions, labels):
    score_fpr, score_tpr, _ = roc_curve(labels, predictions)
    confusion = confusion_matrix(labels.ravel(), predictions.ravel())

    plt.plot(score_fpr, score_tpr)
    plt.xlabel("FPR")
    plt.ylabel("TPR")
    plt.title("ROC AUC Curve")
    plt.show()


    plt.imshow(confusion)

    for true in range(confusion.shape[0]):
        for predicted in range(confusion.shape[1]):
            plt.text(predicted, true, confusion[true, predicted],
                        ha="center", va="center")
            
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.title("Confusion Matrix")
    plt.show()

plot_test_results(prediction, labels)