# Group 1
* 2602052613 - Jovian Yanto 
* 2602068605 - Fernaldy Ferdinand 
* 2602058112 - Jessica Liviana Widiatmo
* 2602066934 - Jocelyn

### Loading basic Libraries

In [None]:
import os
from PIL import Image
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import warnings
from sklearn.metrics import accuracy_score, f1_score
warnings.filterwarnings('ignore')

## Checking Data if there anomaly

In [None]:
path = '/kaggle/input/motif-batik'

In [None]:
directory = '/kaggle/input/motif-batik/batik-betawi' #change gfor each class

# Iterate through the files in the directory
for filename in os.listdir(directory):
    # Join the directory path with the filename
    filepath = os.path.join(directory, filename)
    
    # Check if the file is a valid image file
    try:
        with Image.open(filepath) as img:
            # If the image format is not one of the supported formats, convert it
            if img.format not in ['JPEG', 'PNG', 'GIF', 'BMP']:
                print(f"Converting {filename} to PNG...")
                img.save(filepath, 'PNG')  # Convert to PNG format
    except Exception as e:
        print(f"Error processing {filename}: {e}")
        os.remove(filepath)
        print(f"{filename} removed from {class_name}")

In [None]:
os.listdir(path)

# Loading Dataset

In [None]:
classes = [
    'batik-keraton',
    'batik-kawung',
    'batik-betawi']

In [None]:
data = tf.keras.utils.image_dataset_from_directory(
    path, 
    class_names=classes,
    image_size=(224, 224))

data_iterator = data.as_numpy_iterator()

In [None]:
# rerun for next batch
batch = data_iterator.next()

In [None]:
batch[1]

In [None]:
fig ,ax = plt.subplots(nrows=2, ncols=4, figsize=(20, 10))
for row in range(2):
    for col in range(4):
        idx = row * 4 + col
        ax[row, col].imshow(batch[0][idx].astype(int))
        class_idx = batch[1][idx]  # Get the class index
        class_name = classes[class_idx]  # Map the class index to the class name
        ax[row, col].title.set_text(class_name) 

## Data Preprocessing - Scaling

In [None]:
batch[0].max()

In [None]:
len(batch[0])

In [None]:
scaled = batch[0] / 255

scaled_data = data.map(lambda x,y : (x/255, y))

data_iterator = scaled_data.as_numpy_iterator()
batch = data_iterator.next()

In [None]:
fig ,ax = plt.subplots(nrows=2, ncols=4, figsize=(20, 10))
for row in range(2):
    for col in range(4):
        idx = row * 4 + col
        ax[row, col].imshow(batch[0][idx]) # remove astype int
        class_idx = batch[1][idx]  
        class_name = classes[class_idx] 
        ax[row, col].title.set_text(class_name)

In [None]:
len(scaled_data)

# Data Splitting

In [None]:
randomized_data = scaled_data.shuffle(buffer_size=len(scaled_data), seed=42)

train_size = int(len(scaled_data)*.6)
test_size = int(len(scaled_data)*.2)
val_size = int(len(scaled_data)*.2)

train = randomized_data.take(train_size)
val = randomized_data.skip(train_size).take(val_size)
test = randomized_data.skip(train_size+val_size).take(test_size)

In [None]:
print(len(train))
print(len(test))
print(len(val))

## Using Pretrained Model Params < 10M

In [None]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam

#MobileNetV2 model
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Create a new model on top
model = Sequential()
model.add(base_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(4096, activation='relu'))
model.add(Dense(4096, activation='relu'))
model.add(Dense(3, activation='softmax'))

# Freeze the base model
base_model.trainable = False

# Compile the model
model.compile(optimizer=Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()


# Training Model

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=25, restore_best_weights=True)
hist = model.fit(train, epochs=100, validation_data=val, callbacks=[early_stopping])

# Model Performance Overtime

In [None]:
import matplotlib.pyplot as plt

fig, axs = plt.subplots(1, 2, figsize=(12, 5))

axs[0].plot(hist.history['loss'], color='teal', label='loss')
axs[0].plot(hist.history['val_loss'], color='orange', label='val_loss')
axs[0].set_title('Loss', fontsize=20)
axs[0].legend(loc='upper left')


axs[1].plot(hist.history['accuracy'], color='red', label='accuracy')
axs[1].set_title('Accuracy', fontsize=20)
axs[1].legend(loc='upper left')

plt.tight_layout()
plt.show()

In [None]:
# function for displaying predicted and original image
def display_images(images, true_labels, predicted_labels, num_images=10):
    plt.figure(figsize=(20, 10))
    for i in range(num_images):
        ax = plt.subplot(2, num_images // 2, i + 1)
        plt.imshow(images[i])
        plt.title(f"True: {true_labels[i]}, Pred: {predicted_labels[i]}")
        plt.axis("off")

# Validation Data Evaluation

In [None]:
val_images = []
val_labels = []
for image_batch, label_batch in val:
    val_images.append(image_batch.numpy())
    val_labels.append(label_batch.numpy())

val_images = np.concatenate(val_images)
val_labels = np.concatenate(val_labels)

# Predict probabilities for each class
y_pred_probs = model.predict(val_images)

# Get predicted labels
y_pred_val = np.argmax(y_pred_probs, axis=1)

# Calculate accuracy
accuracy = accuracy_score(val_labels, y_pred_val)

# Calculate F1 score
f1 = f1_score(val_labels, y_pred_val, average='macro') 

print("Test Accuracy:", accuracy)
print("F1 Score:", f1)
display_images(val_images, val_labels, y_pred_val, num_images=14)

# Test Data Evaluation

In [None]:
test_images = []
test_labels = []
for image_batch, label_batch in test:
    test_images.append(image_batch.numpy())
    test_labels.append(label_batch.numpy())

test_images = np.concatenate(test_images)
test_labels = np.concatenate(test_labels)

# Predict probabilities for each class
y_pred_probs = model.predict(test_images)

# Get predicted labels
y_pred_test = np.argmax(y_pred_probs, axis=1)

# Calculate accuracy
accuracy = accuracy_score(test_labels, y_pred_test)

# Calculate F1 score
f1 = f1_score(test_labels, y_pred_test, average='macro') 

print("Test Accuracy:", accuracy)
print("F1 Score:", f1)
display_images(test_images, test_labels, y_pred_test, num_images=14)

## Data Training Scratch using tuned ResNet50 architecture for with small total parameters for efficiency

In [None]:
import keras
from keras.layers import Input, Conv2D, BatchNormalization, Activation, Add, Flatten, Dense, MaxPooling2D, GlobalAveragePooling2D
from keras.models import Model
from keras.optimizers import SGD

# Residual block
def residual_block(x, filters, kernel_size=3, stride=1, conv_shortcut=True):
    """
    A residual block.
    
    Arguments:
    x -- input tensor
    filters -- number of filters
    kernel_size -- kernel size of the convolutional layer
    stride -- stride of the first layer
    conv_shortcut -- whether to use a convolutional shortcut
    
    Returns:
    x -- output tensor for the block
    """
    shortcut = x

    if conv_shortcut:
        shortcut = Conv2D(filters, 1, strides=stride)(x)
        shortcut = BatchNormalization()(shortcut)
    
    x = Conv2D(filters, kernel_size, strides=stride, padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    x = Conv2D(filters, kernel_size, padding='same')(x)
    x = BatchNormalization()(x)
    
    x = Add()([x, shortcut])
    x = Activation('relu')(x)
    
    return x

# Building the ResNet-50 architecture
def build_resnet(input_shape=(224, 224, 3), num_classes=3):
    inputs = Input(shape=input_shape)
    
    # Initial layers
    x = Conv2D(64, 7, strides=2, padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(3, strides=2, padding='same')(x)
    
    # Stage 1
    x = residual_block(x, 32, conv_shortcut=True)
    x = residual_block(x, 32, conv_shortcut=False)
    x = residual_block(x, 32, conv_shortcut=False)

    # Stage 2
    x = residual_block(x, 64, stride=2, conv_shortcut=True)
    x = residual_block(x, 64, conv_shortcut=False)
    x = residual_block(x, 64, conv_shortcut=False)
    x = residual_block(x, 64, conv_shortcut=False)

    # Stage 3
    x = residual_block(x, 128, stride=2, conv_shortcut=True)
    x = residual_block(x, 128, conv_shortcut=False)
    x = residual_block(x, 128, conv_shortcut=False)
    x = residual_block(x, 128, conv_shortcut=False)
    x = residual_block(x, 128, conv_shortcut=False)
    x = residual_block(x, 128, conv_shortcut=False)

    # Stage 4
    x = residual_block(x, 256, stride=2, conv_shortcut=True)
    x = residual_block(x, 256, conv_shortcut=False)
    x = residual_block(x, 256, conv_shortcut=False)

    
    # Final layers
    x = GlobalAveragePooling2D()(x)
    x = Dense(num_classes, activation='softmax')(x)
    
    # Create model
    model = Model(inputs, x)
    
    return model

# Build and compile the ResNet-50 model
model = build_resnet(input_shape=(224, 224, 3), num_classes=3)
model.compile(optimizer=SGD(learning_rate=0.01, momentum=0.9), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()

# Model Training

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=80, restore_best_weights=True) # set patience to 80 to get maximum epoch
hist = model.fit(train, epochs=100, validation_data=val, callbacks=[early_stopping])

# Model Performance Overtime

In [None]:
import matplotlib.pyplot as plt

fig, axs = plt.subplots(1, 2, figsize=(12, 5))

axs[0].plot(hist.history['loss'], color='teal', label='loss')
axs[0].plot(hist.history['val_loss'], color='orange', label='val_loss')
axs[0].set_title('Loss', fontsize=20)
axs[0].legend(loc='upper left')


axs[1].plot(hist.history['accuracy'], color='red', label='accuracy')
axs[1].set_title('Accuracy', fontsize=20)
axs[1].legend(loc='upper left')

plt.tight_layout()
plt.show()

# Validation Data Evaluation

In [None]:
val_images = []
val_labels = []
for image_batch, label_batch in val:
    val_images.append(image_batch.numpy())
    val_labels.append(label_batch.numpy())

val_images = np.concatenate(val_images)
val_labels = np.concatenate(val_labels)

# Predict probabilities for each class
y_pred_probs = model.predict(val_images)

# Get predicted labels
y_pred_val = np.argmax(y_pred_probs, axis=1)

# Calculate accuracy
accuracy = accuracy_score(val_labels, y_pred_val)

# Calculate F1 score
f1 = f1_score(val_labels, y_pred_val, average='macro')  # Change average as per your requirement

print("Test Accuracy:", accuracy)
print("F1 Score:", f1)
display_images(val_images, val_labels, y_pred_val, num_images=14)

# Test Data Evaluation

In [None]:
test_images = []
test_labels = []
for image_batch, label_batch in test:
    test_images.append(image_batch.numpy())
    test_labels.append(label_batch.numpy())

test_images = np.concatenate(test_images)
test_labels = np.concatenate(test_labels)

# Predict probabilities for each class
y_pred_probs = model.predict(test_images)

# Get predicted labels
y_pred_test = np.argmax(y_pred_probs, axis=1)

# Calculate accuracy
accuracy = accuracy_score(test_labels, y_pred_test)

# Calculate F1 score
f1 = f1_score(test_labels, y_pred_test, average='macro') 

print("Test Accuracy:", accuracy)
print("F1 Score:", f1)
display_images(test_images, test_labels, y_pred_test, num_images=14)