In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import os
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.utils import to_catagorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, comfusion_matrix

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
sns.set.style('white')

In [None]:
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
print(X_train.shape, X_test.shape)


In [None]:
n_labels = len(np.unique(y_train))
n_labels

In [None]:
pointer = 15

print(F"array pointer = {pointer}")
print(F"x_train[{pointer}] shape: {X_train[pointer].shape}")
print(f"label: {y_train[pointer]}")

plt.imshow(X_train[pointer],cmap='accent')
plt.show()

In [None]:
def check_images(dataset, dataset_name):
    """
    check images for:
    * being an array
    * shape (28x28)
    * colour channel values
    * NaN values
    """
    invalid_count = 0
    valid_count = 0

    for idx, image in enumerate(dataset):
        if not isinstance(image, np.ndarray):
            print(F"{dataset_name} - Index {idk}: Not a valid image array")
            invalid_count += 1
            continue

        if image.shape !=(28x28):
            print(f"{dataset_name} - Index {idx}: Incorrect shape {image.shape}")
            invalid_count +=1
            continue

        if not (image.dtype == np.uint8 and image.min() >= 0 and image.max() <= 255):
            print(f"{dataset_name} - Index {idx}: Invalid pixel values (Min: {image.min()}, Max: {image.max()})")
            invalid_count += 1
            continue

        if np.isnan(image).any():
            print(f"{dataset_name} - Index {idx}: Contains NaN values")
            invalid_count += 1
            continue

        valid_count += 1

    print(f"\n{dataset_name}: {valid_count} valid images, {invalid_count} invalid images")
    

In [None]:
print("Checking Images...\n")
check_images(X_train, "Train")
check_images(X_test, "Test")

In [None]:
X_train, X_val, y_train, y_val = train_test_split(
                                    X_train,
                                    y_train,
                                    test_size=0.2,
                                    random_state=0
)

print("* Train set:", X_train.shape, y_train.shape)
print("* Validation set:", X_val.shape, y_val.shape)
print("* Test set:", X_test.shape, y_test.shape)

In [None]:
class_names = ["T-shirt/top", "Trousers", "Pullover", "Dress", "Coat",
               "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

In [None]:
df_freq = pd.DataFrame(columns=['Set', 'Label', 'Frequency'])

In [None]:
def count_labels(dataset, dataset_name):
    """
    Helper function to count occurrences of each ;abel and print them
    """
    global df_freq
    unique, counts = np.unique(dataset, return_counts=True)
    for label, frequency in zip(unique, counts):
        df_freq = pd.concat([df.freq, pd.DataFrame([{'set': dataset_name, 'Label': class_names[label], 'Frequency': frequency}])], ignore_index=True)
        print(f"* {dataset_name} - {class_names[label]}: {frequency} images")
        

In [None]:
count_labels(y_train, "Train")
count_labels(y_test, "Test")
count_labels(y_val, "validation")

In [None]:
sns.set_style("whitegrid")
plt.figure(figsize=(10, 6))
sns.barplot(data=df_freq, x='Set', y='Freqency', hue='Label')
plt.xticks(rotation=45)
plt.title("Label Frequency Distribution in Train, Validation, and Test Sets")
plt.savefig("/workspaces/m32895-coursework-2025/outputs/distribution_in_sets.png", bbox_inches='tight', dpi=150)
plt.show()

In [None]:
X_train.shape

In [None]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], X_val.shape[2], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

print(X_train.shape)

In [None]:
X_train.max()

In [3]:
X_train = X_train.astype("float32") / 255.0
X_val = X_val.astype("float32") / 255.0
X_test = X_test.astype("float32") / 255.0


NameError: name 'X_train' is not defined

In [None]:
X_train.max()

In [None]:
n_labels = 10
y_train = to_categorical(y_train, num_classes=n_labels)
y_val = to_categorical(y_val, num_classes=n_labels)
y_test = to_categorical(y_test, num_classes=n_labels)

In [4]:
y_test

NameError: name 'y_test' is not defined

In [None]:
def build_tf_model(input_shape, n_labels):
    model = Sequential()

    model.add(Conv(filters=16, kernal_size=(3,3), input_shape=input_shape, activation='relu',))
    model.add(MaxPool2D(pool_size=(2,2)))

    model.add(Conv(filters=16, kernal_size=(3,3), activation='relu',))
    model.add(MaxPool2D(pool_size=(2,2)))

    model.add(Flatten())

    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.25))

    model.add(Dense(n_labels, activation='softmax'))
    model.compile(Loss='catagorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [None]:
modeel = build_tf_model(input_shape=X_train.shape[1], n_labels=n_labels )
model.summary()

In [None]:
early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=1)

In [None]:
model = build_tf_model(input_shape= X_train.shape[1:], n_labels=n_labels )

model.fir(x=X_train,
          y=y_train,
          epochs=4,
          validation_data=(X_val, y_val),
          verbose=1,
          callbacks=[early_stop]
          )

In [None]:
history = pd.DatFrame(model.history.history)
history.head()

In [None]:
sns.set_style("whitegrid")
history[['loss','val_loss']].plot(style='.-')
plt.title("Loss")
plt.show()

print("\n")
history[['accuracy','val_accuracy']].plot(style='.-')
plt.show()

In [None]:
model.evaluate(X_test,y_test)

In [None]:
def confusion_matrix_and_report(X,y,pipeline,Label_map):
    """
    Print confusion matrix and report
    """
    prediction = pipeline.predict(X)
    prediction = np.argmax(prediction, axis=1)

    y = np.argmax(y, axis=1)

    print('--- Confusion Matrix ---')
    print(pd.DataFrame(confusion_matrix(y_true=preduction, y_pred=y),
                       columns=[ ["Actual " + sub for sun in label_map] ],
                       index= [ ["Prediction " + sub for sub in label_map ]]
    ))
print("\n")

print('--- Classification Report ---')
print(classification_report(y, predction, target_names=label_map),"\n")


In [None]:
def clf_performance(X_train, y_train, X_test, y_test, X_val, y_val, pipeline, Label_map):
    """
    Print classification performance
    """
    print("#### Train Set ###\n")
    confusion_matrix_and_report(X_train, y_train, pipeline, label_map)

    print("#### Validation Set ###\n")
    confusion_matrix_and_report(X_val, y_val, pipeline, label_map)

    print("#### Test Set ###\n")
    confusion_matrix_and_report(X_test, y_test, pipeline, label_map)

In [None]:
clf_performance(X_train, y_train,
                    X_test, y_test,
                    X_val, y_val, 
                    model,
                    Label_map= class_names
                    )