# 3D Mnist - machine learning project

Author: Mateusz Sabuk

## Imports

In [11]:
import h5py
import pickle

import numpy as np
import pandas as pd

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam

import matplotlib.pyplot as plt
import plotly.graph_objects as go
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

## Functions

- create_model(variables) -> model
- train_model(model, variables) -> training_history
- plot_model(model, training_history, model_name): saves plots at ./output/
- save_data_to_file(data, data_name="data"): saves data to file at ./output/

In [None]:
def create_model(variables):
    model = Sequential()

    model.add(Input(shape=(16, 16, 16, 1)))

    # Create convolutional layers
    for filter_num, kernel_size, pool_size in zip(variables["filters"], variables["kernel_sizes"], variables["pool_sizes"]):
        model.add(Conv3D(filters=filter_num, kernel_size=kernel_size, activation='relu'))
        model.add(BatchNormalization())
        model.add(MaxPooling3D(pool_size=pool_size))

    # Flatten the output from the convolutional layers
    model.add(Flatten())

    # Fully connected layer
    model.add(Dense(units=variables["dense"], activation='relu'))
    model.add(Dropout(variables["dropout"]))  # Adding dropout for regularization

    # Output layer
    model.add(Dense(units=10, activation='softmax'))
    
    # Compile the model
    model.compile(optimizer=Adam(learning_rate=variables["learning_rate"]), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    return model

def train_model(model, variables):
    return model.fit(X_train, y_train, epochs=variables["epochs"], batch_size=variables["batch_size"], validation_data=(X_test, y_test))

def plot_model(model, training_history, model_name):
    # Plot training & validation accuracy values
    plt.figure(figsize=(8, 6))
    plt.plot(training_history.history['accuracy'])
    plt.plot(training_history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'])
    plt.savefig(f'output/{model_name}_accuracy.png', bbox_inches='tight')

    # Plot training & validation loss values
    plt.figure(figsize=(8, 6))
    plt.plot(training_history.history['loss'])
    plt.plot(training_history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Test'])
    plt.savefig(f'output/{model_name}_loss.png', bbox_inches='tight')

    # Predict the classes on the test set
    y_pred = model.predict(X_test)
    y_pred_classes = y_pred.argmax(axis=1)
    # Compute the confusion matrix
    cm = confusion_matrix(y_test, y_pred_classes)
    # Display the confusion matrix
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=range(10))
    disp.plot(cmap=plt.cm.Reds)
    plt.savefig(f'output/{model_name}_cm.png', bbox_inches='tight')

def save_data_to_file(data, data_name="data"):
    with open(f'output/{data_name}.pkl', 'wb') as f:
        pickle.dump(data, f)

def display_3d_vector(data):
    data = data.reshape(16,16,16)

    x, y, z = np.indices(data.shape)
    values = data.flatten()
    x = x.flatten()
    y = y.flatten()
    z = z.flatten()
    values = [f"rgba(20, 201, 129,{x})" for x in values]

    fig = go.Figure(data=go.Scatter3d(
        x=x,
        y=y,
        z=z,
        mode='markers',
        marker=dict(
            size=4,
            color=values,  
            colorbar=dict(title='Value')
        )
    ))

    fig.update_layout(
        scene=dict(
            xaxis_title='X Axis',
            yaxis_title='Y Axis',
            zaxis_title='Z Axis'
        )
    )
    
    fig.show()

### Unused ones

I wanted to save the models with checkpoints but it was not really the part of the project

In [None]:
# def show_available_models():
#     matching_filenames = []
#     for filename in os.listdir('models/'):
#         if filename.endswith(f".h5"):
#             matching_filenames.append(filename)

#     for filename in matching_filenames:
#         # Access the metadata
#         with h5py.File(filename, 'r') as f:
#             if 'metadata' in f:
#                 metadata = f['metadata'].attrs
#                 variables = metadata.get('variables', '')
#                 print(f'File: {filename}')
#                 print(f'Used variables: {variables}')


# def get_model_from_file(model_name):
#     return load_model(f"models/{model_name}.h5")


# def save_model_to_file(model_name, variables):
#     # Load the best model
#     model = load_model(f'models/{model_name}.keras')

#     # Save the model again with additional metadata
#     model_save_path = f'models/{model_name}.h5'
#     model.save(model_save_path)

#     # Open the saved model file to add metadata
#     with h5py.File(model_save_path, 'a') as f:
#         # Create a group for metadata
#         metadata_group = f.create_group('metadata')
#         # Add descriptions or any other metadata
#         metadata_group.attrs['variables'] = variables

## Get data from h5 file

In [None]:
with h5py.File("data/full_dataset_vectors.h5", "r") as hf:
    X_train_vec = hf["X_train"][:]
    y_train = hf["y_train"][:]
    X_test_vec = hf["X_test"][:]
    y_test = hf["y_test"][:]

X_train_vec.shape, y_train.shape, X_test_vec.shape, y_test.shape

### Reshape from 4096 vectors to 16x16x16 with 1 channel

In [None]:
X_train = X_train_vec.reshape(-1, 16, 16, 16, 1)
X_test = X_test_vec.reshape(-1, 16, 16, 16, 1)

X_train.shape, X_test.shape

## Display a single vector of data

In [30]:
display_3d_vector(X_test[2])

## Model variables

variables example:
```python3
model_variables = {
    # Convolutional layers
    "filters": [32, 64, 128],
    "kernel_sizes": [2, 2, 2],
    "pool_sizes": [2, 2, 2],
    # Rest of the model
    "dense": 512,
    "dropout": 0.3,
    "learning_rate": 0.01,
    # Model training
    "epochs": 20,
    "batch_size": 32,
}
```

In [None]:
variables_array = [{
    # Convolutional layers
    "filters": [32, 64, 128],
    "kernel_sizes": [2, 2, 2],
    "pool_sizes": [2, 2, 2],
    # Rest of the model
    "dense": 512,
    "dropout": 0.3,
    "learning_rate": 0.01,
    # Model training
    "epochs": 20,
    "batch_size": 32,
},{
    # Convolutional layers
    "filters": [128, 64, 32],
    "kernel_sizes": [2, 2, 2],
    "pool_sizes": [2, 2, 2],
    # Rest of the model
    "dense": 512,
    "dropout": 0.3,
    "learning_rate": 0.01,
    # Model training
    "epochs": 20,
    "batch_size": 32,
},{
    # Convolutional layers
    "filters": [64, 64, 64],
    "kernel_sizes": [2, 2, 2],
    "pool_sizes": [2, 2, 2],
    # Rest of the model
    "dense": 512,
    "dropout": 0.3,
    "learning_rate": 0.01,
    # Model training
    "epochs": 20,
    "batch_size": 32,
},{
    # Convolutional layers
    "filters": [32, 32, 32],
    "kernel_sizes": [2, 2, 2],
    "pool_sizes": [2, 2, 2],
    # Rest of the model
    "dense": 512,
    "dropout": 0.3,
    "learning_rate": 0.01,
    # Model training
    "epochs": 20,
    "batch_size": 32,
},
]

In [None]:
data = []
for i, variables in enumerate(variables_array):
    try:
        model = create_model(variables)
        training_history = train_model(model, variables)
        plot_model(model, training_history, str(i).zfill(2))
        data.append(tuple([variables, training_history]))
    except Exception as e:
        print(e)
        data.append(f"Model nr {str(i).zfill(2)} error: {e}")
    save_data_to_file(data)