In [None]:
# IMPORTS
import tensorflow as tf 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# PREPROCESSING

# Define Image/Save Directories
common_path = "/Users/michaelpatsais/Documents/Uni_work/Astrophysics/machine_learning/deep-learning-2024/Project/msl-images/"
# save_directory = "" # for saving plots

idg = ImageDataGenerator(rescale = 1./255.)

# LABELS DF
image_classes = np.loadtxt(common_path + "msl_synset_words-indexed.txt", dtype = str, delimiter = ",")
labels_df = pd.DataFrame(image_classes, columns = ["filename", "class"])

# extracting unique values from the "class" column of `labels_df` and converting them into a list.
labels = labels_df["class"].unique().tolist()
labels = [x.strip() for x in labels]
print(labels)

# creating an array of integers from 0 to the length of the `labels` list to assign a numerical identifier to each unique label in the dataset.
classes = np.arange(len(labels))
classes = [str(x) for x in classes]
print(classes)

# training dataset
train = np.loadtxt(common_path + "train-calibrated-shuffled.txt", dtype = str, delimiter = " ")
train_df = pd.DataFrame(train, columns = ["filename", "class"])
train_ds = idg.flow_from_dataframe(dataframe = train_df, directory = common_path, classes = classes, batch_size = 100)

# VAL dataset
val = np.loadtxt(common_path + "val-calibrated-shuffled.txt", dtype = str, delimiter = " ")
val_df = pd.DataFrame(val, columns = ["filename", "class"])
val_ds = idg.flow_from_dataframe(dataframe = val_df, directory = common_path, classes = classes, batch_size = 100)

# TEST dataset
test = np.loadtxt(common_path + "test-calibrated-shuffled.txt", dtype = str, delimiter = " ")
test_df = pd.DataFrame(test, columns = ["filename", "class"])
test_ds = idg.flow_from_dataframe(dataframe = test_df, directory = common_path, classes = classes, batch_size = 100)

['apxs', 'apxs cal target', 'chemcam cal target', 'chemin minlet open', 'drill', 'drill holes', 'drt front', 'drt side', 'ground', 'horizon', 'inlet', 'mahli', 'mahli cal target', 'mastcam', 'mastcam cal target', 'observation tray', 'portion box', 'portion tube', 'portion tube opening', 'rems uv sensor', 'rover rear deck', 'scoop', 'sun', 'turret', 'wheel']
['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24']
Found 3746 validated image filenames belonging to 25 classes.
Found 1640 validated image filenames belonging to 25 classes.
Found 1305 validated image filenames belonging to 25 classes.


In [None]:
#  MODEL CONFIGURATION
batch_size = 100
epochs = 10
dropout_val = 0.4

# image dimensions
xpix = train_ds[0][0][0].shape[0] # [batch][image_data/label][image]
ypix = train_ds[0][0][0].shape[1]
zpix = train_ds[0][0][0].shape[2]

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(64, (5, 5), activation='relu', input_shape=(xpix, ypix, zpix)),
    tf.keras.layers.MaxPooling2D((2, 2)),

    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),

    tf.keras.layers.Flatten(),

    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(dropout_val),
    tf.keras.layers.Dense(32, activation='relu'),
    
    tf.keras.layers.Dense(25, activation='softmax')
])

model.summary()

loss_fn = tf.keras.losses.CategoricalCrossentropy()

model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])

history  = model.fit(train_ds, validation_data=val_ds, batch_size=batch_size, epochs=epochs)

In [None]:
# Model Accuracy & Loss

print("  N(Epochs)        = ", epochs)
print("  accuracy (train) = ", history.history['accuracy'])
print("  accuracy (test)  = ", history.history['val_accuracy'])

plt.plot(history.history['accuracy']) # 'accuracy'
plt.plot(history.history['val_accuracy']) # 'val_accuracy'
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validate'], loc='lower right')
# plt.savefig(save_directory + "/Multi_Classification_model_accuracy.png")
plt.show()

print("  loss (train)     = ", history.history['loss'])
print("  loss (test)      = ", history.history['val_loss'])

# summarize history for loss
plt.plot(history.history['loss']) # 'loss'
plt.plot(history.history['val_loss']) # 'val_loss'
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validate'], loc='upper right')
# plt.savefig(save_directory + "/Multi_Classification_model_loss")
plt.show()

In [None]:
# Plotting image of each Classification

predictions = model.predict(test_ds)

# combining batches so that test and prediction images line up
all_images = np.concatenate([test_ds[batch][0] for batch in range(len(test_ds))])
all_images_data = np.concatenate([test_ds[batch][1] for batch in range(len(test_ds))])

fig, axes = plt.subplots(5, 5, figsize = (20,20))
            
for i in range(25): # a counter that represents the classfication we want to print
    ax = axes[i//5, i%5] # calculating the subplot to print to
    for image in range(len(all_images_data)):
        if (np.argmax(all_images_data[image]) != i): # if the image is not of the right classification, it is skipped
            continue
        else:
            ax.set_title(f"P:{labels[np.argmax(predictions[image])]},T:{labels[i]}") # right classification is printed, with its label and corresponding prediction
            ax.imshow(all_images[image])
            break # break to move on to next classification
        
plt.tight_layout()        
# plt.savefig(save_directory + "/2b.png")
plt.show()

In [None]:
# evaluating accuracy of predictions

# Checking for imbalance in number of images per class in train and test data 
# class_amounts_train = train_df["class"].value_counts().to_dict()
# class_amounts_test = test_df["class"].value_counts().to_dict()
# print(class_amounts_train)
# print(class_amounts_test)

classifications = [[] for _ in range(25)] # creating an empty list for each classification, to store correct/incorrect predictions

for i, image_data in enumerate(all_images_data): # for each image, its label and prediction are stored in variables
    true_val = int(np.argmax(image_data)) 
    pred_val = int(np.argmax(predictions[i]))
    
    if true_val == pred_val: #if the variables match, the list of the classification will be appended by a correct/incorrect 
        classifications[true_val].append("correct")
    else:
        classifications[true_val].append("incorrect")

correct = [classification.count("correct") for classification in classifications] #for each classification list, the number of correct predictions are stored 
incorrect = [classification.count("incorrect") for classification in classifications] #for each classification list, the number of incorrect predictions are stored

x_axis = np.arange(len(classifications))

# plotting the correcct/incorrect amounts per classification as a bar chart
plt.figure()
plt.bar(x_axis, correct, color = "blue")
plt.bar(x_axis, incorrect, bottom = correct, color = "red")
plt.title('model accuracy bar chart')
plt.ylabel('No. of images')
plt.xlabel('Classification')
plt.xticks(range(len(classes)))
plt.legend(['correct', 'incorrect'], loc='upper left')
# plt.savefig(save_directory + "/2c_accuracy_bar.png")
plt.show()

# calculating accuracy percentage per class
accuracy_perc = []

for correct_count, incorrect_count in zip(correct, incorrect):
    total = correct_count+incorrect_count
    if total > 0:
        acc = correct_count/total
    else: 
        acc = 0 
    accuracy_perc.append(acc)

plt.plot(accuracy_perc)
plt.title('model accuracy percentage')
plt.ylabel('Accuracy Percentage')
plt.xlabel('Classification')
plt.xticks(range(len(classes)))
# plt.savefig(save_directory + "/2c_accuracy_perc.png")
plt.show()