In [13]:
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import fashion_mnist

from skimage.transform import resize
from sklearn.metrics import accuracy_score

In [7]:
# Flipped Images with ratio
def random_flipped_img(images, labels, flip_ratio):
    num_flips = int(len(images) * flip_ratio)
    random_choice = np.random.choice(len(images), num_flips, replace=False)
    new_images = []
    new_labels = []
    for i, (img,label) in enumerate(zip(images,labels)):
        if i in random_choice:
            flipped_img = np.flip(img, axis=0)
            new_images.append(flipped_img)
            new_labels.append(1) # 1 for flip
        else:
            new_images.append(img)
            new_labels.append(0) # 0 for not flip
    return new_images, new_labels

# Load dataset
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()
train_images = train_images / 255.0
test_images = test_images / 255.0

# Ratio of train and test (8:2)
train_ratio = 0.8
test_ratio = 0.2
# Total images of train and test set after apply ratio
total_train_num = int(len(train_images)*train_ratio)
total_test_num = int(len(test_images)*test_ratio)

print("Total images of train set:",total_train_num)
print("Total images of test set: ", total_test_num)

# Select random images from train set
random_indices_train = np.random.choice(len(train_images), total_train_num, replace=False)
selected_train_images = train_images[random_indices_train]
selected_train_labels = train_labels[random_indices_train]

# Select random images from test set
random_indices_test = np.random.choice(len(test_images), total_test_num, replace=False)
selected_test_images = test_images[random_indices_test]
selected_test_labels = test_labels[random_indices_test]

# Fiptting ratio
flip_ratio = 0.5
# train - images and labels after flippting randomly
flipped_train_images, flipped_train_labels = random_flipped_img(selected_train_images, selected_train_labels, flip_ratio)

# test - images and labels after flippting randomly
flipped_test_images, flipped_test_labels = random_flipped_img(selected_test_images, selected_test_labels, flip_ratio)


Total images of train set: 48000
Total images of test set:  2000


In [22]:
# Resize images
def resize_images(images):
    resized_images = []
    for image in images:
        resized_image = resize(image, (28, 28))
        if len(resized_image.shape) == 2:
            resized_image = np.expand_dims(resized_image, axis=-1)
        resized_image = np.mean(resized_image, axis=2) 
        resized_images.append(resized_image)
    return np.array(resized_images)

# Convert to numpy arrays and normalize
train_images = np.array(flipped_train_images) / 255.0
train_labels = np.array(flipped_train_labels)
test_images = np.array(flipped_test_images) / 255.0
test_labels = np.array(flipped_test_labels)


train_images_resized = resize_images(train_images)
test_images_resized = resize_images(test_images)
input_shape = train_images_resized[0].shape

# Adjust Values
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate = 0.0005,
    decay_steps=1000,
    decay_rate=0.9,
    staircase=True)

# Define CNN model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=(28,28,1)),
    tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'),
    tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(units=128, activation='relu'),
    tf.keras.layers.Dense(units=10, activation='softmax')
])

# Compile model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train model
history = model.fit(
    train_images_resized,
    train_labels,
    epochs=5,
    batch_size=64, # 16 32 64 128 256 vv 
    validation_data=(test_images_resized, test_labels)
)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [23]:
# CNN prediction
predicted_labels = []

for i in range(len(test_images_resized)):
    # Make prediction on a single image
    prediction = model.predict(np.expand_dims(test_images_resized[i], axis=0))
    # Add label after prediction into array
    predicted_label = np.argmax(prediction)
    predicted_labels.append(predicted_label)

print("Predicted labels of images:", predicted_labels)

# Calculate accuracy
accuracy = accuracy_score(test_labels, predicted_labels)
print("Accuracy:", accuracy)


Predicted labels of images: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1,

In [11]:
from sklearn.ensemble import RandomForestClassifier

# Define Random Forest model
# Increase accuracy => increase number of trees
RF = RandomForestClassifier(n_estimators=100, random_state=42)

# Flatten the images for compatibility with non-CNN models
train_images_flattened = tf.reshape(train_images_resized, (train_images_resized.shape[0], -1))
test_images_flattened = tf.reshape(test_images_resized, (test_images_resized.shape[0], -1))
# Train the Random Forest Classifier
RF.fit(train_images_flattened, train_labels)
# Random Forest Prediction -> Use as Replace Solution -> Will be update on future later
predicted_labels_RF = RF.predict(test_images_flattened)
# Calculate accuracy
accuracy = accuracy_score(test_labels, predicted_labels_RF)
print(test_labels)
print(predicted_labels_RF)
print("Accuracy:", accuracy)

[1 1 1 ... 0 0 1]
[1 1 1 ... 0 0 1]
Accuracy: 0.977
