# Homework 2:  Improve Baseline CNN model and compute metrics for assessing the performance of the CNN-based model

## Notebook Outline

    1. Train Basic Model (From Homework 1)

    2. Saving and Loading Model
    
    3. Metrics Access Performance
    
    4. Hyper-parameter Tuning
    
    5. Overfitting Prevention
    
    6. Compare Performance of Basic and Improved Model

## 1. Train Basic Model (From Homework 1)

In [None]:
# Load the tensorflow, which is a framework for deep learning.
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
# Load numpy library as "np", which can handle large matrices and provides some mathematical functions.
import numpy as np 
# Load pandas as "pd", which is useful when working with data tables. 
import pandas as pd 
# Load random, which provide some randomize functions.
import random
# Load a function pyplot as "plt" to plot figures.
import matplotlib.pyplot as plt
# Load functions to calculate precision, and recall
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Setup the random seed for reproducibility
seed = 1234
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

In [None]:
# The root directory of the pmuBAGE data
pmuBAGE_data_dir = "pmuBAGE/data"

# Number of the tensor for voltage and frequency
voltage_tensor_number = 31
frequency_tensor_number = 21

# Load each tensors of voltage events and concatenate them as a big tensor.
voltage_tensor_list = []
for idx in range(voltage_tensor_number):
    voltage_sub_tensor = np.load(f"{pmuBAGE_data_dir}/voltage/voltage_{idx}.npy")
    voltage_tensor_list.append(voltage_sub_tensor)
voltage_tensor = np.concatenate(voltage_tensor_list, axis=0)


# Load each tensors of frequency events and concatenate them as a big tensor.
frequency_tensor_list = []
for idx in range(frequency_tensor_number):
    frequency_sub_tensor = np.load(f"{pmuBAGE_data_dir}/frequency/frequency_{idx}.npy")
    frequency_tensor_list.append(frequency_sub_tensor)
frequency_tensor = np.concatenate(frequency_tensor_list, axis=0)

# Transpose the big tensor as (event_idx, timestamp, PMU_idx, measurements)
voltage_tensor = np.transpose(voltage_tensor, (0, 3, 2, 1))
frequency_tensor = np.transpose(frequency_tensor, (0, 3, 2, 1))

# Print the shape of the voltage event
print(voltage_tensor.shape)
print(frequency_tensor.shape)

In [None]:
"""
    Use standardization to pre-process the pmu time series data.
    Input  -> two original tensors: voltage_tensor, frequency_tensor
    Output -> two standardized tensors: voltage_tensor_standardized, frequency_tensor_standardized
    Requirement Details: 
        The tensor shape is (number_of_event, timestamps (600), pmus (100), measurements (4))
        For each time sequence (Single pmu measurement sequence, 600 timestamps), standardize them by Z-Score
        z-score = (x - mean) / std
"""

# Voltage tensor

voltage_mean = np.mean(voltage_tensor, axis=1)
voltage_mean = np.expand_dims(voltage_mean, axis=1)
voltage_std = np.std(voltage_tensor, axis=1)
voltage_std = np.expand_dims(voltage_std, axis=1)
voltage_tensor_standardized = np.nan_to_num((voltage_tensor - voltage_mean) / voltage_std)

# Frequency tensor

frequency_mean = np.mean(frequency_tensor, axis=1)
frequency_mean = np.expand_dims(frequency_mean, axis=1)
frequency_std = np.std(frequency_tensor, axis=1)
frequency_std = np.expand_dims(frequency_std, axis=1)
frequency_tensor_standardized = np.nan_to_num((frequency_tensor - frequency_mean) / frequency_std)

print(voltage_tensor_standardized.shape)
print(frequency_tensor_standardized.shape)


In [None]:
# Number of the classes
num_classes = 2

# Number of the voltage and frequency events in the dataset
n_voltage = voltage_tensor_standardized.shape[0]
n_frequency = frequency_tensor_standardized.shape[0]

# Define the labels
# Voltage events' label is defined as: 0
voltage_label = np.array([0] * n_voltage)
# Frequency events' label is defined as: 1
frequency_label = np.array([1] * n_frequency)

"""
    Implement the one-hot encoding on the lablel of of the voltage and frequency event labels.
    Input  -> Original voltage and frequency labels (voltage_label, frequency_label)
    Output -> One-hot encoded voltage and frequency labels (voltage_label_onehot, frequency_label_onthot)
    Voltage label: "0" -> "[1, 0]"
    Frequency label: "1" -> "[0, 1]"
    You can use any library or tool for doing this
"""

voltage_label_onehot = tf.keras.utils.to_categorical(voltage_label, num_classes=num_classes)
frequency_label_onthot = tf.keras.utils.to_categorical(frequency_label, num_classes=num_classes)

# Should be [1, 0]
print(voltage_label_onehot[0])
# Should be [0, 1]
print(frequency_label_onthot[0])
# Should be (620, 2)
print(voltage_label_onehot.shape)
# Should be (84, 2)
print(frequency_label_onthot.shape)

In [None]:
voltage_tensor_standarded_permuted = voltage_tensor_standardized[np.random.permutation(n_voltage)]
frequency_tensor_standarded_permuted = frequency_tensor_standardized[np.random.permutation(n_frequency)]

In [None]:
# Seperate the data to train and test
train_portion = 0.7

# Samples
X_voltage = voltage_tensor_standarded_permuted
X_frequency = frequency_tensor_standarded_permuted
# Labels
y_voltage = voltage_label_onehot
y_frequency = frequency_label_onthot

"""
    Seperate the samples and labels to train and test datasets.
    70% of the voltage and frequency samples and labels are combined as training dataset
    30% remainings are combined as testing dataset
    Input  -> X_voltage, X_frequency, y_voltage, y_frequency
    Output -> X_train, y_train, X_test, y_test
        X_train contains 70% of the X_voltage and X_frequency
        y_train contains 70% of the y_voltage and y_frequency
        X_test contains 30% of the X_voltage and X_frequency
        y_test contains 30% of the y_voltage and y_frequency
"""

# X_train
X_train_voltage = X_voltage[:int(n_voltage * train_portion)] 
X_train_frequency = X_frequency[:int(n_frequency * train_portion)]
X_train = np.concatenate([X_train_voltage, X_train_frequency], axis=0)

# y_train
y_train_voltage = y_voltage[:int(n_voltage * train_portion)] 
y_train_frequency = y_frequency[:int(n_frequency * train_portion)]
y_train = np.concatenate([y_train_voltage, y_train_frequency], axis=0)

# X_test
X_test_voltage = X_voltage[int(n_voltage * train_portion):] 
X_test_frequency = X_frequency[int(n_frequency * train_portion):]
X_test = np.concatenate([X_test_voltage, X_test_frequency], axis=0)

# y_test
y_test_voltage = y_voltage[int(n_voltage * train_portion):] 
y_test_frequency = y_frequency[int(n_frequency * train_portion):]
y_test = np.concatenate([y_test_voltage, y_test_frequency], axis=0)

# Should be (492, 600, 100, 4)
print(X_train.shape)
# Should be (492, 2)
print(y_train.shape)
# Should be (212, 600, 100, 4)
print(X_test.shape)
# Should be (212, 2)
print(y_test.shape)

In [None]:
def build_model():
    """
        Add more laybers in the model, at least three convolusional layers.
        Then add the Flatten and Dense layers to make the output same with the number of classes.
    """

    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(600, 100, 4)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(32, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(32, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(32, activation='relu'))
    model.add(layers.Dense(num_classes, activation='softmax'))
    
    return model

model = build_model()

# Define the Loss function
loss_func = tf.keras.losses.CategoricalCrossentropy()

# Define the optimizer and learning rate
lr = 0.01
optimizer = tf.keras.optimizers.Adam(lr)

# Compile the neural network model
model.compile(optimizer=optimizer, loss=loss_func, metrics=['categorical_accuracy'])

# Train the neural network
history = model.fit(X_train, y_train, epochs=10, batch_size=16)

# Evaluate the neural network
loss, accuracy = model.evaluate(X_test, y_test)
print(f"The accuracy of the neural network on the test dataset is: {accuracy}.")

## 2. Saving and Loading Model

In [None]:
##-----------------------------------------------------------------------##
##---------------------Students start filling below----------------------##
##-----------------------------------------------------------------------##

"""
    Save trained model to file for future application or further fine-tuning train. 
"""

# Write the code to save the trained model to file.


"""
    Load the model from the file and compile it.
"""

# Write the code to load the model from file and compile it.
model = ...

##-----------------------------------------------------------------------##
##------------------------------End filling------------------------------##
##-----------------------------------------------------------------------##

## 3. Metrics Assess Performance

In [None]:
# Get the test samples and labels, and get the model's prediction on test data.
X_test = X_test
y_test = y_test
y_pred = model.predict(X_test)

In [None]:
##-----------------------------------------------------------------------##
##---------------------Students start filling below----------------------##
##-----------------------------------------------------------------------##

"""
    Homework 1, only calculate the accuracy of the whole dataset.
    In this task, you required to calculate the accuracy, precision, recall, and F1-score.
"""

# Accuracy
accuracy = ...

# Precision
precision = ...

# Recall
recall = ...

# F1-Score
f1 = ...

print(f"The accuracy is: {accuracy}.")
print(f"The precision is: {precision}.")
print(f"The recall is: {recall}.")
print(f"The f1 score is: {f1}.")

##-----------------------------------------------------------------------##
##------------------------------End filling------------------------------##
##-----------------------------------------------------------------------##

## 4. Hyper-parameter Tuning

In [None]:
"""
    Seperate the Training Dataset to Training (80%) and Validation (20%).
    Perform the hyper-parameter tuning to find the best parameter combination.
    
"""

val_portaton = 0.2

n_voltage_train = int(n_voltage * train_portion)
n_frequency_train = int(n_frequency * train_portion)

X_train_voltage = X_voltage[:int(n_voltage * train_portion)] 
X_train_frequency = X_frequency[:int(n_frequency * train_portion)]
X_train = np.concatenate([X_train_voltage, X_train_frequency], axis=0)

# X_val_hp
X_val_hp_voltage = X_train[:int(n_voltage_train * val_portaton)]
X_val_hp_frequency = X_train[n_voltage_train: n_voltage_train + int(n_frequency_train * val_portaton)]
X_val_hp = np.concatenate([X_val_hp_voltage, X_val_hp_frequency], axis=0)

# y_val_hp
y_val_hp_voltage = y_train[:int(n_voltage_train * val_portaton)]
y_val_hp_frequency = y_train[n_voltage_train: n_voltage_train + int(n_frequency_train * val_portaton)]
y_val_hp = np.concatenate([y_val_hp_voltage, y_val_hp_frequency], axis=0)


# X_train_hp
X_train_hp_voltage = X_train[int(n_voltage_train * val_portaton): n_voltage_train]
X_train_hp_frequency = X_train[n_voltage_train + int(n_frequency_train * val_portaton):]
X_train_hp = np.concatenate([X_train_hp_voltage, X_train_hp_frequency], axis=0)

# y_train_hp
y_train_hp_voltage = y_train[int(n_voltage_train * val_portaton): n_voltage_train]
y_train_hp_frequency = y_train[n_voltage_train + int(n_frequency_train * val_portaton):]
y_train_hp = np.concatenate([y_train_hp_voltage, y_train_hp_frequency], axis=0)


# Should be (492, 600, 100, 4)
print(X_train_hp.shape)
# Should be (492, 2)
print(y_train_hp.shape)
# Should be (212, 600, 100, 4)
print(X_val_hp.shape)
# Should be (212, 2)
print(y_val_hp.shape)

In [None]:
# Setup the random seed for reproducibility
seed = 1234
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

In [None]:
# Hyper-parameters
learning_rates = [0.001, 0.01]
batch_sizes = [16, 32]
training_epochs = [10, 20]

##-----------------------------------------------------------------------##
##---------------------Students start filling below----------------------##
##-----------------------------------------------------------------------##


"""
    Try different combination of the hyper-parameters.
    Train on training dataset, test on validation dataset.
    Choose the best hyper-parameter combination to train the final improved model.
"""


best_hyperparameter = {"learning_rate": 0, "batch_size": 0, "training_epoch": 0}
best_accuracy_val = 0.0


for learning_rate in learning_rates:
    for batch_size in batch_sizes:
        for training_epoch in training_epochs:
            print(f"Current hyper-parameters: learning_rate: {learning_rate}, batch_size: {batch_size}, training_epoch: {training_epoch}.")
            
            """
                Train the model under hyper-parameter setting, and evaluate over validation dataset.
            """
            
            """ Filling code below """
            
            # build model
            model_tuning = build_model()
            # Train the model with the above hyper-parameters

            
            """ End Filling """
    
            # Evaluate the hyper-parameter tuning neural network
            loss_val, accuracy_val = model_tuning.evaluate(X_val_hp, y_val_hp)
            print(f"Current validation accuracy is: {accuracy_val}.\n")
            
            if accuracy_val > best_accuracy_val:
                best_accuracy_val = accuracy_val
                best_hyperparameter["learning_rate"] = learning_rate
                best_hyperparameter["batch_size"] = batch_size
                best_hyperparameter["training_epoch"] = training_epoch
            tf.keras.backend.clear_session()
            
            
print(f"best validation accuracy is: {best_accuracy_val}")
print(f"best hyper-parameter setting is: {best_hyperparameter}")

##-----------------------------------------------------------------------##
##------------------------------End filling------------------------------##
##-----------------------------------------------------------------------##

## 5. Overfitting Prevention
    Early Stopping to Prevent the overfitting.

In [None]:
# Set the best hyper-parameter from previous tuning. (You may change them based on previous results)

learning_rate = 0.001
batch_size = 32
training_epoch = 20

##-----------------------------------------------------------------------##
##---------------------Students start filling below----------------------##
##-----------------------------------------------------------------------##


"""
    Use the early stopping to prevent the overfitting.
    Useful resource: https://keras.io/api/callbacks/early_stopping/
"""


# Build model
model_improved = build_model()
# Define the loss function
loss_func = tf.keras.losses.CategoricalCrossentropy()
# Define the optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate)
# Compile the model
model_improved.compile(optimizer=optimizer, loss=loss_func, metrics=['categorical_accuracy'])

""" Filling code below """

# Define the early stopping callback
early_stopping = ...

""" End Filling """

# Train the model with the tuned hyper-parameters and early-stopping.
history = model_improved.fit(X_train_hp, y_train_hp, validation_data=(X_val_hp, y_val_hp), 
                             epochs=training_epoch, batch_size=batch_size, callbacks=[early_stopping])

##-----------------------------------------------------------------------##
##------------------------------End filling------------------------------##
##-----------------------------------------------------------------------##

## 6. Compare Performance of Basic and Improved Model

In [None]:
# Precision, recall, F1-score of the basic model

y_pred = model.predict(X_test)
accuracy = accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1))
precision = precision_score(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1), average='macro')
recall = recall_score(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1), average='macro')
f1 = f1_score(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1), average='macro')

print("Performance of the basic model.")
print(f"The accuracy is: {accuracy}.")
print(f"The precision is: {precision}.")
print(f"The recall is: {recall}.")
print(f"The f1 score is: {f1}.")

In [None]:
# Precision, recall, F1-score of the improved model

y_pred_improved = model_improved.predict(X_test)
accuracy = accuracy_score(np.argmax(y_test, axis=1), np.argmax(y_pred_improved, axis=1))
precision = precision_score(np.argmax(y_test, axis=1), np.argmax(y_pred_improved, axis=1), average='macro')
recall = recall_score(np.argmax(y_test, axis=1), np.argmax(y_pred_improved, axis=1), average='macro')
f1 = f1_score(np.argmax(y_test, axis=1), np.argmax(y_pred_improved, axis=1), average='macro')

print("Performance of the improved model.")
print(f"The accuracy is: {accuracy}.")
print(f"The precision is: {precision}.")
print(f"The recall is: {recall}.")
print(f"The f1 score is: {f1}.")