[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ThomasAlbin/Astroniz-YT-Tutorials/blob/main/[ML1]-Asteroid-Spectra/TBD.ipynb)

# Step 8 TBD

In [1]:
# Import standard libraries
import os

# Import installed libraries
import numpy as np
import pandas as pd
import sklearn

from sklearn import preprocessing
from sklearn import svm
from sklearn.model_selection import GridSearchCV

In [2]:
# Let's mount the Google Drive, where we store files and models (if applicable, otherwise work
# locally)
try:
    from google.colab import drive
    drive.mount('/gdrive')
    core_path = "/gdrive/MyDrive/Colab/asteroid_taxonomy/"
except ModuleNotFoundError:
    core_path = ""

In [3]:
# Load the level 2 asteroid data
asteroids_df = pd.read_pickle(os.path.join(core_path, "data/lvl2/", "asteroids.pkl"))

In [4]:
# Allocate the spectra to one array and the classes to another one
asteroids_X = np.array([k["Reflectance_norm550nm"].tolist() for k in asteroids_df["SpectrumDF"]])
asteroids_y_org = np.array(asteroids_df["Main_Group"].to_list())

In [5]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

label_encoder = OneHotEncoder(sparse=True)
asteroids_y = label_encoder.fit_transform(asteroids_y_org.reshape(-1,1)).toarray()

In [6]:
#asteroids_X = (asteroids_X - np.mean(asteroids_X, axis=1).reshape(-1,1)) / np.std(asteroids_X, axis=1).reshape(-1,1)

#asteroids_X = (asteroids_X-np.min(asteroids_X, axis=1).reshape(-1,1))/(np.max(asteroids_X, axis=1).reshape(-1,1)-np.min(asteroids_X, axis=1).reshape(-1,1))

In [7]:
#plt.plot(asteroids_X[0])

NameError: name 'plt' is not defined

In [8]:
# In this example we create a single test-training split with a ratio of 0.8 / 0.2
from sklearn.model_selection import StratifiedShuffleSplit
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2)

# Create a simple, single train / test split
for train_index, test_index in sss.split(asteroids_X, asteroids_y):
    
    X_train, X_test = asteroids_X[train_index], asteroids_X[test_index]
    y_train, y_test = asteroids_y[train_index], asteroids_y[test_index]

re_class_train = np.argmax(y_train, axis=1)
re_class_test = np.argmax(y_test, axis=1)

# Compute class weightnings
weight_dict = {}
for ast_type in np.unique(re_class_train):
    weight_dict[ast_type] = float(1.0 / (len(re_class_train[re_class_train == ast_type]) / (len(re_class_train))))

In [9]:
weight_dict

{0: 3.3785488958990535, 1: 8.5, 2: 2.439635535307517, 3: 5.666666666666666}

In [10]:
y_test

array([[0., 0., 1., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.],
       ...,
       [0., 0., 1., 0.],
       [1., 0., 0., 0.],
       [0., 1., 0., 0.]])

In [12]:
import tensorflow.keras as keras

n_inputs = asteroids_X.shape[1]

normalizer = keras.layers.Normalization(axis=1)
normalizer.adapt(X_train)

inputs = keras.Input(shape=(n_inputs,))
x = normalizer(inputs)
x = keras.layers.Dense(100)(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Dense(50)(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Dense(10)(x)
x = keras.layers.ReLU()(x)
outputs = keras.layers.Dense(4, activation="softmax")(x)

model = keras.models.Model(inputs=inputs, outputs=outputs)

model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

model.summary()

end_epoch = 500
batch_size = 32
history = model.fit(X_train, y_train,
                    epochs=end_epoch,
                    batch_size=batch_size,
                    verbose=0, 
                    validation_split=0.25,
                    class_weight=weight_dict)

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 49)]              0         
                                                                 
 dense_6 (Dense)             (None, 10)                500       
                                                                 
 re_lu_5 (ReLU)              (None, 10)                0         
                                                                 
 dense_7 (Dense)             (None, 4)                 44        
                                                                 
Total params: 544
Trainable params: 544
Non-trainable params: 0
_________________________________________________________________


In [None]:
plt.plot(history.history["loss"])
plt.plot(history.history["val_loss"])

In [None]:
n_inputs = asteroids_X.shape[1]

normalizer = keras.layers.Normalization(axis=1)
normalizer.adapt(X_train)

inputs = keras.Input(shape=(n_inputs,))
x = normalizer(inputs)
x = keras.layers.Dense(100)(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Dense(50)(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Dense(10)(x)
x = keras.layers.ReLU()(x)
outputs = keras.layers.Dense(4, activation="softmax")(x)

model = keras.models.Model(inputs=inputs, outputs=outputs)

model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

model.summary()

end_epoch = 500
batch_size = 32

es_callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
history = model.fit(X_train, y_train,
                    epochs=end_epoch,
                    batch_size=batch_size,
                    verbose=2, 
                    validation_split=0.25,
                    class_weight=weight_dict,
                    callbacks=[es_callback])

In [None]:
y_test_prop_pred = model.predict(X_test)

In [None]:
y_int_test_pred = np.argmax(y_test_prop_pred, axis=1)

In [None]:
b = np.zeros_like(y_test_prop_pred)
b[np.arange(len(y_test_prop_pred)), y_test_prop_pred.argmax(1)] = 1
print(b)

In [None]:
asteroid_classes_test = label_encoder.inverse_transform(y_test).reshape(1,-1)[0]
asteroid_classes_test_pred = label_encoder.inverse_transform(b).reshape(1,-1)[0]

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay

from sklearn.metrics import confusion_matrix
conf_mat = confusion_matrix(asteroid_classes_test, asteroid_classes_test_pred, labels=["C", "S", "X", "Other"])


# Set the dark mode and the font size and style
plt.style.use('dark_background')
plt.rc('font', family='serif', size=12)


disp = ConfusionMatrixDisplay(confusion_matrix=conf_mat, display_labels=["C", "S", "X", "Other"])
disp.plot()
plt.show()

In [None]:
# A combined score
f1_score = round(sklearn.metrics.f1_score(re_class_test, y_int_test_pred, average="weighted"), 3)
print(f"F1 Score: {f1_score}")