In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
PATH = '/content/drive/MyDrive/1:1_Hanish_Acharla/Dataset/Final_Dataset/Train'

In [None]:
def get_optimizer(optimizer_name, learning_rate):
    # Import keras optimizers
    from tensorflow.keras.optimizers import Adam, Adadelta, Adagrad, Adamax, Ftrl, Nadam, RMSprop, SGD
    print('Selected Optimizer', optimizer_name)
    switcher = {
        'Adadelta': Adadelta(lr=learning_rate),
        'Adagrad': Adagrad(lr=learning_rate),
        'Adam': Adam(lr=learning_rate),
        'Adamax': Adamax(lr=learning_rate),
        'FTRL': Ftrl(lr=learning_rate),
        'NAdam': Nadam(lr=learning_rate),
        'RMSprop': RMSprop(lr=learning_rate),
        'Gradient Descent': SGD(lr=learning_rate)
    }
    # If optimizer_name is empty, Adam will be return as default optimizer
    return switcher.get(optimizer_name, Adam(lr=learning_rate))

In [None]:
def convert_tf_dataset(PATH, model):
    # This function passes all images provided in PATH
    # and passes them through the model.
    # The result is a featurized image along with labels
    data = []
    IMG_SIZE = (224, 224)
    file_list = []
    # Get the list of subfolders
    sub_dirs = next(os.walk(PATH))[1]
    print(sub_dirs)
    num_images = 0
    # Create a list of lists
    # Number of lists is same as the number of subfolders
    # Number of items in the sub-list is the number of
    # images in each sub-folder
    for category in sub_dirs:
        files = next(os.walk(PATH + '/' + category), (None, None, []))[2]
        filenames = [PATH + '/' + category + '/' + file for file in files]
        num_images += len(filenames)
        file_list.append(filenames)
        labels = []
    # Every image is pre-processed and passed thought the model
    # Label is created for every image
    for category in file_list:
        for img_path in category:
            img = tf.keras.preprocessing.image.load_img(img_path, target_size=IMG_SIZE)
            img_array = tf.keras.preprocessing.image.img_to_array(img)
            img_batch = np.expand_dims(img_array, axis=0)
            img_preprocessed = preprocess_input(img_batch)
            data.append(model.predict(img_preprocessed))
            labels.append(img_path.split('/')[-2])

    # Make sure dimensions are (num_samples, 1280)
    data = np.squeeze(np.array(data))
    labels = np.reshape(labels, (-1,1))
    return data, labels

In [None]:
# Import packages needed to create a image classification model
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
from keras.applications.resnet import preprocess_input
#from keras.preprocessing.image import ImageDataGenerator # No change required here as it is not being used
from keras.layers import Dense,GlobalAveragePooling2D
from keras.models import Model
from keras.layers import Dense,GlobalAveragePooling2D
from keras.callbacks import EarlyStopping
from tensorflow import keras
IMG_SIZE = (224, 224)
# Download the model, valid alpha values [0.25,0.35,0.5,0.75,1]
base_model = tf.keras.applications.ConvNeXtXLarge(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
# Add average pooling to the base
x = base_model.output
x = GlobalAveragePooling2D()(x)
model_frozen = Model(inputs=base_model.input,outputs=x)
# Get the transformed features from the dataset
# TODO: This can be moved to the FE stage of the pipeline
# label_map is not used anywhere right now. it has information
# about which label is mapped to which number
data, labels = convert_tf_dataset(PATH, model_frozen)
# Shuffle the dataset for training
shuffler = np.random.permutation(len(data))
data_shuffled = data[shuffler]
labels_shuffled = labels[shuffler]
print(data_shuffled)
num_features = data_shuffled.shape[1]

In [None]:
import pandas as pd
feature_names = []
for a in range(0,num_features):
  feature_names.append('feature_' + str(a))
feature_names.append('label')
df = pd.DataFrame(data=np.hstack((data_shuffled,labels_shuffled)), columns=feature_names)
df.head()

In [None]:
df.to_csv('/content/drive/MyDrive/1:1_Hanish_Acharla/Dataset/Final_Dataset/Featurized DataSet/train_data.csv', index=False)
