# Classification Solution 2


In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Input, Resizing
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, confusion_matrix, f1_score
import seaborn as sns
import time
from sklearn.metrics import accuracy_score
from tensorflow import keras

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

train_path = 'archive/train'
valid_path = 'archive/valid'
test_path = 'archive/test'

custom_image_size = (224, 224) 
target_image_size = (224, 224)  
batch_size = 128

#normalizing images without aug
datagen = ImageDataGenerator(rescale=1./255)

train_generator = datagen.flow_from_directory(train_path, target_size=custom_image_size, batch_size=batch_size, class_mode='categorical')
valid_generator = datagen.flow_from_directory(valid_path, target_size=custom_image_size, batch_size=batch_size, class_mode='categorical')
test_generator = datagen.flow_from_directory(test_path, target_size=custom_image_size, batch_size=batch_size, class_mode='categorical')

num_classes = train_generator.num_classes

#Load Vision Transformer model from TensorFlow Hub
hub_url = "https://tfhub.dev/sayakpaul/vit_b16_fe/1"
vit_layer = hub.KerasLayer(hub_url, trainable=False)

inputs = Input(shape=(custom_image_size[0], custom_image_size[1], 3))
x = Resizing(target_image_size[0], target_image_size[1])(inputs) #test code
x = vit_layer(x)
x = Dropout(0.2)(x)
outputs = Dense(num_classes, activation='softmax')(x)

model = Model(inputs, outputs)

#compile
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Measure training time
start_time = time.time()
history = model.fit(train_generator, epochs=20, validation_data=valid_generator, callbacks=[early_stopping], verbose=1)
end_time = time.time()
training_time = end_time - start_time
print(f"Training Time: {training_time} seconds")

# Measure inference time
start_time = time.time()
valid_generator2 = datagen.flow_from_directory(test_path, target_size=custom_image_size, batch_size=batch_size, class_mode='categorical', shuffle=False)
# Generate predictions
y_true = valid_generator2.classes
y_pred = np.argmax(model.predict(valid_generator2), axis=1)
end_time = time.time()
inference_time = end_time - start_time
print(f"Total Inference Time: {inference_time} seconds")

#calc F1 Score
f1 = f1_score(y_true, y_pred, average='weighted')
print(f"F1 Score: {f1}")

#calc accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy}")

# Generate classification report
print(classification_report(y_true, y_pred, target_names=list(valid_generator2.class_indices.keys())))

# Plot training & validation accuracy values
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Vision Transformer accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Test'], loc='upper right')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Vision Transformer Training and Validation loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Validation'], loc='upper right')
plt.show()

model.save('model.keras') 

## Visualise images for Data Insight. Uncomment to run independently.

In [None]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array

ndef load_data(directory, image_size):
    data = []
    labels = []
    for subdir in os.listdir(directory):
        subdir_path = os.path.join(directory, subdir)
        if os.path.isdir(subdir_path):
            for file in os.listdir(subdir_path):
                file_path = os.path.join(subdir_path, file)
                image = load_img(file_path, target_size=image_size)
                image = img_to_array(image)
                data.append(image)
                labels.append(subdir)
    return np.array(data), np.array(labels)


train_path = 'archive/train'
valid_path = 'archive/valid'
test_path = 'archive/test'


image_size = (224, 224)  # Example size, change as needed
X_train, y_train = load_data(train_path, image_size=image_size)
X_valid, y_valid = load_data(valid_path, image_size=image_size)
X_test, y_test = load_data(test_path, image_size=image_size)


 class_image_count = {}

for subdir in os.listdir(train_path):
    subdir_path = os.path.join(train_path, subdir)
    if os.path.isdir(subdir_path):
        image_count = len(os.listdir(subdir_path))
        class_image_count[subdir] = image_count

# Print the number of images in each class
print("Number of images in each class:")
for class_name, count in class_image_count.items():
    print(f'{class_name}: {count} images')


min_class = min(class_image_count, key=class_image_count.get)
max_class = max(class_image_count, key=class_image_count.get)

print(f'\nClass with the lowest number of images: {min_class} ({class_image_count[min_class]} images)')
print(f'Class with the highest number of images: {max_class} ({class_image_count[max_class]} images)')

# Plot class distribution
plt.figure(figsize=(20, 10))
plt.bar(class_image_count.keys(), class_image_count.values())
plt.xticks(rotation=90)
plt.xlabel('Species')
plt.ylabel('Number of Images')
plt.title('Class Distribution of Butterfly and Moths Dataset')
plt.show()

# Input parms
classes_to_show = ['AN 88', 'ATALA', 'BROOKES BIRDWING']  # Replace with class name here
images_per_class = 8
rows = len(classes_to_show)
cols = 8

# Filter and sort images
def filter_and_sort_images(X, y, classes, images_per_class):
    filtered_images = []
    filtered_labels = []
    for class_name in classes:
        indices = [i for i, label in enumerate(y) if label == class_name]
        selected_indices = np.random.choice(indices, images_per_class, replace=False)
        for idx in selected_indices:
            filtered_images.append(X[idx])
            filtered_labels.append(y[idx])
    return np.array(filtered_images), np.array(filtered_labels)

# Visualize filtered images
def visualize_images(filtered_images, filtered_labels, rows, cols):
    plt.figure(figsize=(20, 10))
    for i in range(len(filtered_images)):
        plt.subplot(rows, cols, i + 1)
        plt.imshow(filtered_images[i].astype("uint8"))
        plt.title(filtered_labels[i])
        plt.axis("off")
    plt.show()

filtered_images, filtered_labels = filter_and_sort_images(X_train, y_train, classes_to_show, images_per_class)
visualize_images(filtered_images, filtered_labels, rows, cols)

