In [17]:
import os
import numpy as np
import keras.applications as ka
import keras
import tensorflow as tf
import cv2
from keras.src.layers import Dense

In [18]:
def load_model():
    '''
    Load in a model using the tf.keras.applications model and return it.
    Insert a more detailed description here TODO
    '''
    model = tf.keras.applications.MobileNetV2()
    num_classes = 5
    model.layers[-1] = Dense(num_classes, activation='softmax')
    return model

In [33]:
def load_data(path):
    '''
    Load in the dataset from its home path. Path should be a string of the path
    to the home directory the dataset is found in. Should return a numpy array
    with paired images and class labels.
    '''
    classes = os.listdir(path)
    images = []
    labels = []
    data = []
    image_size=(224, 224)
    for i, class_name in enumerate(classes):
        
        class_path = os.path.join(path, class_name)
        class_images = os.listdir(class_path)
        for image_name in class_images:
            image_path = os.path.join(class_path, image_name)
            image = cv2.imread(image_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, image_size)
            image = np.array(image)
            images.append(image)
            labels.append(class_name)
            data.append((image, class_name))
    images = np.array(images)
    labels = np.array(labels)
    print(data)
    return data

In [20]:
def split_data(X, Y, train_fraction, randomize=False, eval_set=True):
    """
    Split the data into training and testing sets. If eval_set is True, also create
    an evaluation dataset. There should be two outputs if eval_set there should
    be three outputs (train, test, eval), otherwise two outputs (train, test).
    
    Args:
        X (numpy.ndarray): Input features.
        Y (numpy.ndarray): Corresponding labels.
        train_fraction (float): Fraction of data to use for training.
        randomize (bool, optional): Whether to randomly shuffle the data. Defaults to False.
        eval_set (bool, optional): Whether to create an evaluation dataset. Defaults to True.
    
    Returns:
        tuple: If eval_set is True, returns (train_X, train_Y, test_X, test_Y, eval_X, eval_Y).
               If eval_set is False, returns (train_X, train_Y, test_X, test_Y).
    """
    num_samples = len(X)
    train_samples = int(num_samples * train_fraction)
    test_samples = num_samples - train_samples

    if randomize:
        indices = np.random.permutation(num_samples)
        X = X[indices]
        Y = Y[indices]

    train_X = X[:train_samples]
    train_Y = Y[:train_samples]
    test_X = X[train_samples:train_samples + test_samples]
    test_Y = Y[train_samples:train_samples + test_samples]

    if eval_set:
        eval_X = X[train_samples + test_samples:]
        eval_Y = Y[train_samples + test_samples:]
        return train_X, train_Y, test_X, test_Y, eval_X, eval_Y
    else:
        return train_X, train_Y, test_X, test_Y

In [34]:
data = load_data('small_flower_dataset')
split_data(x,y, 0.8)

[(array([[[82, 53, 19],
        [82, 53, 19],
        [84, 55, 21],
        ...,
        [60, 36,  0],
        [60, 36,  0],
        [60, 36,  0]],

       [[82, 53, 19],
        [83, 53, 20],
        [84, 55, 21],
        ...,
        [61, 39,  0],
        [61, 39,  0],
        [61, 39,  0]],

       [[82, 53, 19],
        [83, 54, 20],
        [84, 55, 21],
        ...,
        [60, 38,  0],
        [60, 38,  0],
        [60, 38,  0]],

       ...,

       [[31, 15,  0],
        [32, 16,  1],
        [34, 17,  1],
        ...,
        [ 1,  1,  1],
        [ 1,  1,  1],
        [ 1,  1,  1]],

       [[32, 16,  1],
        [32, 16,  1],
        [34, 18,  2],
        ...,
        [ 1,  1,  1],
        [ 1,  1,  1],
        [ 1,  1,  1]],

       [[32, 16,  1],
        [32, 17,  1],
        [34, 18,  2],
        ...,
        [ 1,  1,  1],
        [ 1,  1,  1],
        [ 0,  0,  0]]], dtype=uint8), 'daisy'), (array([[[248, 245, 246],
        [253, 251, 252],
        [250, 250, 250],
   

(array([[[[ 82,  53,  19],
          [ 82,  53,  19],
          [ 84,  55,  21],
          ...,
          [ 60,  36,   0],
          [ 60,  36,   0],
          [ 60,  36,   0]],
 
         [[ 82,  53,  19],
          [ 83,  53,  20],
          [ 84,  55,  21],
          ...,
          [ 61,  39,   0],
          [ 61,  39,   0],
          [ 61,  39,   0]],
 
         [[ 82,  53,  19],
          [ 83,  54,  20],
          [ 84,  55,  21],
          ...,
          [ 60,  38,   0],
          [ 60,  38,   0],
          [ 60,  38,   0]],
 
         ...,
 
         [[ 31,  15,   0],
          [ 32,  16,   1],
          [ 34,  17,   1],
          ...,
          [  1,   1,   1],
          [  1,   1,   1],
          [  1,   1,   1]],
 
         [[ 32,  16,   1],
          [ 32,  16,   1],
          [ 34,  18,   2],
          ...,
          [  1,   1,   1],
          [  1,   1,   1],
          [  1,   1,   1]],
 
         [[ 32,  16,   1],
          [ 32,  17,   1],
          [ 34,  18,   2],
   