In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
plt.switch_backend('module://ipykernel.pylab.backend_inline') #used when working with the matplotlib library in Python within a Jupyter Notebook environment to save pgf plots effectively.
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from keras.utils import to_categorical
from sklearn.metrics import confusion_matrix
import seaborn as sns

# Pre-processing data

# Load data from .npy files
input_data = np.load('group5_input_data.npy')
target_data = np.load('group5_target_data.npy')
print("input data: ", input_data)
print("target data: ", target_data[:20])

print("input data shape: ", input_data.shape)
print("target data shape: ", target_data.shape)

# Normalize data to improve convergence
scaler = MinMaxScaler()
normalized_input_data = scaler.fit_transform(input_data)
print("Normalized data: ", normalized_input_data) 

# Observe unique combinations
unique_combinations = np.unique(target_data, axis=0)
print("Unique combinations:")
print(unique_combinations)

# Assign integer labels
label_map = {}
for i, combo in enumerate(unique_combinations):
    label_map[tuple(combo)] = i

# Encode target data using the label map
encoded_labels = np.array([label_map[tuple(combo)] for combo in target_data])

print("Encoded Labels:")
print(encoded_labels[:20]) #Print first 20 to verify

# Convert to one-hot encoding
one_hot_labels = to_categorical(encoded_labels)
print("One-Hot Encoded Labels:")
print(one_hot_labels[:20]) #Print first 20 to verify

# Print label-to-combination mapping just for further verification
reverse_label_map = {v: k for k, v in label_map.items()}
print("Label to Combination Mapping:")
for label, combo in reverse_label_map.items():
    print(f"Label {label}: {combo}")





input data:  [[ -71.21212121   92.42424242    1.07992247]
 [ -65.15151515    4.54545455    1.76714587]
 [ -50.         -137.87878788    0.78539816]
 ...
 [ -83.33333333  -71.21212121    2.84706834]
 [-131.81818182  -31.81818182    4.90873852]
 [  31.81818182  134.84848485    2.94524311]]
target data:  [[0. 1. 1.]
 [0. 0. 1.]
 [0. 1. 1.]
 [1. 0. 1.]
 [0. 1. 1.]
 [0. 0. 1.]
 [1. 0. 1.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 1.]
 [0. 0. 1.]
 [1. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [1. 1. 0.]
 [1. 0. 1.]
 [1. 1. 0.]
 [1. 1. 0.]]
input data shape:  (498420, 3)
target data shape:  (498420, 3)
Normalized data:  [[0.25773196 0.81443299 0.171875  ]
 [0.27835052 0.51546392 0.28125   ]
 [0.32989691 0.03092784 0.125     ]
 ...
 [0.21649485 0.25773196 0.453125  ]
 [0.05154639 0.39175258 0.78125   ]
 [0.60824742 0.95876289 0.46875   ]]
Unique combinations:
[[0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 1.]
 [1. 0. 0.]
 [1. 0. 1.]
 [1. 1. 0.]]
Encoded Labels:
[2 0 2 4 2 0 4 3 0 1 2 0 5 3 3 0 5 4 5 5]
O

In [2]:
# For categorical cross entropy using one hot encoding 

# Split data into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(normalized_input_data, one_hot_labels, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(3,)),  # Input layer
    tf.keras.layers.Dense(12, activation='relu'),  # Hidden layer
    tf.keras.layers.Dense(12, activation='relu'),   # Hidden layer
    tf.keras.layers.Dense(12, activation='relu'),   # Hidden layer
    tf.keras.layers.Dense(6, activation='softmax')  # Output layer with softmax activation
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model using training data and in parallel validating using validation data
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=151, batch_size=50, verbose=2, shuffle=True)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test loss: {loss:.4f}, Test accuracy: {accuracy:.4f}")

plt.figure(figsize=(8, 6))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.legend(['loss', 'val_loss'])
plt.title('loss')
plt.xlabel('epoch')
plt.savefig('one_hot_loss.pgf', format='pgf') # Save plot for report

plt.figure(figsize=(8, 6))
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.legend(['accuracy', 'val_accuracy'])
plt.title('Accuracy')
plt.xlabel('epoch')
plt.savefig('one_hot_accuracy.pgf', format='pgf')# Save plot for report

# Confusion matrix - To validate training
# model.predict gives the predicted probabilities for each class
y_pred_prob = model.predict(X_train)
y_pred = np.argmax(y_pred_prob, axis=1)
y_true = np.argmax(y_train, axis=1)

# Create a confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.savefig('one_hot_conf.pgf', format='pgf')# Save plot for report
plt.show()

Epoch 1/151
7975/7975 - 17s - loss: 0.3731 - accuracy: 0.8724 - val_loss: 0.1750 - val_accuracy: 0.9379 - 17s/epoch - 2ms/step
Epoch 2/151
7975/7975 - 15s - loss: 0.1545 - accuracy: 0.9443 - val_loss: 0.1392 - val_accuracy: 0.9486 - 15s/epoch - 2ms/step
Epoch 3/151
7975/7975 - 16s - loss: 0.1327 - accuracy: 0.9513 - val_loss: 0.1179 - val_accuracy: 0.9577 - 16s/epoch - 2ms/step
Epoch 4/151
7975/7975 - 15s - loss: 0.1161 - accuracy: 0.9573 - val_loss: 0.1058 - val_accuracy: 0.9605 - 15s/epoch - 2ms/step
Epoch 5/151
7975/7975 - 16s - loss: 0.1079 - accuracy: 0.9598 - val_loss: 0.0983 - val_accuracy: 0.9638 - 16s/epoch - 2ms/step
Epoch 6/151
7975/7975 - 15s - loss: 0.1026 - accuracy: 0.9612 - val_loss: 0.1259 - val_accuracy: 0.9480 - 15s/epoch - 2ms/step
Epoch 7/151
7975/7975 - 14s - loss: 0.0989 - accuracy: 0.9624 - val_loss: 0.0996 - val_accuracy: 0.9602 - 14s/epoch - 2ms/step
Epoch 8/151
7975/7975 - 15s - loss: 0.0972 - accuracy: 0.9628 - val_loss: 0.0898 - val_accuracy: 0.9669 - 15s/e

7975/7975 - 14s - loss: 0.0579 - accuracy: 0.9762 - val_loss: 0.0537 - val_accuracy: 0.9778 - 14s/epoch - 2ms/step
Epoch 66/151
7975/7975 - 15s - loss: 0.0573 - accuracy: 0.9764 - val_loss: 0.0505 - val_accuracy: 0.9805 - 15s/epoch - 2ms/step
Epoch 67/151
7975/7975 - 15s - loss: 0.0561 - accuracy: 0.9770 - val_loss: 0.0512 - val_accuracy: 0.9803 - 15s/epoch - 2ms/step
Epoch 68/151
7975/7975 - 14s - loss: 0.0558 - accuracy: 0.9771 - val_loss: 0.0573 - val_accuracy: 0.9758 - 14s/epoch - 2ms/step
Epoch 69/151
7975/7975 - 14s - loss: 0.0552 - accuracy: 0.9774 - val_loss: 0.0477 - val_accuracy: 0.9802 - 14s/epoch - 2ms/step
Epoch 70/151
7975/7975 - 14s - loss: 0.0544 - accuracy: 0.9776 - val_loss: 0.0560 - val_accuracy: 0.9771 - 14s/epoch - 2ms/step
Epoch 71/151
7975/7975 - 14s - loss: 0.0539 - accuracy: 0.9779 - val_loss: 0.0836 - val_accuracy: 0.9632 - 14s/epoch - 2ms/step
Epoch 72/151
7975/7975 - 14s - loss: 0.0534 - accuracy: 0.9783 - val_loss: 0.0519 - val_accuracy: 0.9779 - 14s/epoch 

KeyboardInterrupt: 

In [None]:
# For sparse categorical cross entropy - 

# Split data into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(normalized_input_data, encoded_labels, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(3,)),  # Input layer
    tf.keras.layers.Dense(12, activation='relu'),  # Hidden layer
    tf.keras.layers.Dense(12, activation='relu'),   # Hidden layer
    tf.keras.layers.Dense(12, activation='relu'),   # Hidden layer
    tf.keras.layers.Dense(6, activation='softmax')  # Output layer with softmax activation
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history_sparse = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=151, batch_size=50, verbose=2, shuffle=True)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test loss: {loss:.4f}, Test accuracy: {accuracy:.4f}")

plt.figure(figsize=(8, 6))
plt.plot(history_sparse.history['loss'])
plt.plot(history_sparse.history['val_loss'])
plt.legend(['loss', 'val_loss'])
plt.title('loss')
plt.xlabel('epoch')
plt.savefig('sparse_loss.pgf', format='pgf')# Save plot for report

plt.figure(figsize=(8, 6))
plt.plot(history_sparse.history['accuracy'])
plt.plot(history_sparse.history['val_accuracy'])
plt.legend(['accuracy', 'val_accuracy'])
plt.title('Accuracy')
plt.xlabel('epoch')
plt.savefig('sparse_accuracy.pgf', format='pgf')# Save plot for report

# Confusion matrix - To validate training
# model.predict gives the predicted probabilities for each class
y_pred_prob = model.predict(X_train)
y_pred = np.argmax(y_pred_prob, axis=1)
y_true = y_train

# Create a confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)

# Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.savefig('sparse_conf.pgf', format='pgf')# Save plot for report
plt.show()

Epoch 1/151
7975/7975 - 19s - loss: 0.4195 - accuracy: 0.8553 - val_loss: 0.1984 - val_accuracy: 0.9300 - 19s/epoch - 2ms/step
Epoch 2/151
7975/7975 - 15s - loss: 0.1811 - accuracy: 0.9342 - val_loss: 0.1668 - val_accuracy: 0.9439 - 15s/epoch - 2ms/step
Epoch 3/151
7975/7975 - 14s - loss: 0.1622 - accuracy: 0.9397 - val_loss: 0.1584 - val_accuracy: 0.9394 - 14s/epoch - 2ms/step
Epoch 4/151
7975/7975 - 15s - loss: 0.1534 - accuracy: 0.9417 - val_loss: 0.1486 - val_accuracy: 0.9434 - 15s/epoch - 2ms/step
Epoch 5/151
7975/7975 - 15s - loss: 0.1477 - accuracy: 0.9425 - val_loss: 0.1429 - val_accuracy: 0.9459 - 15s/epoch - 2ms/step
Epoch 6/151
7975/7975 - 14s - loss: 0.1437 - accuracy: 0.9432 - val_loss: 0.1373 - val_accuracy: 0.9462 - 14s/epoch - 2ms/step
Epoch 7/151
7975/7975 - 14s - loss: 0.1396 - accuracy: 0.9439 - val_loss: 0.1342 - val_accuracy: 0.9489 - 14s/epoch - 2ms/step
Epoch 8/151
7975/7975 - 14s - loss: 0.1363 - accuracy: 0.9444 - val_loss: 0.1385 - val_accuracy: 0.9449 - 14s/e