In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
PATH = '/content/drive/MyDrive/1:1_Hanish_Acharla/Dataset/Final_Dataset/Test'

In [3]:
def get_optimizer(optimizer_name, learning_rate):
    # Import keras optimizers
    from tensorflow.keras.optimizers import Adam, Adadelta, Adagrad, Adamax, Ftrl, Nadam, RMSprop, SGD
    print('Selected Optimizer', optimizer_name)
    switcher = {
        'Adadelta': Adadelta(lr=learning_rate),
        'Adagrad': Adagrad(lr=learning_rate),
        'Adam': Adam(lr=learning_rate),
        'Adamax': Adamax(lr=learning_rate),
        'FTRL': Ftrl(lr=learning_rate),
        'NAdam': Nadam(lr=learning_rate),
        'RMSprop': RMSprop(lr=learning_rate),
        'Gradient Descent': SGD(lr=learning_rate)
    }
    # If optimizer_name is empty, Adam will be return as default optimizer
    return switcher.get(optimizer_name, Adam(lr=learning_rate))

In [4]:
def convert_tf_dataset(PATH, model):
    # This function passes all images provided in PATH
    # and passes them through the model.
    # The result is a featurized image along with labels
    data = []
    IMG_SIZE = (224, 224)
    file_list = []
    # Get the list of subfolders
    sub_dirs = next(os.walk(PATH))[1]
    print(sub_dirs)
    num_images = 0
    # Create a list of lists
    # Number of lists is same as the number of subfolders
    # Number of items in the sub-list is the number of
    # images in each sub-folder
    for category in sub_dirs:
        files = next(os.walk(PATH + '/' + category), (None, None, []))[2]
        filenames = [PATH + '/' + category + '/' + file for file in files]
        num_images += len(filenames)
        file_list.append(filenames)
        labels = []
    # Every image is pre-processed and passed thought the model
    # Label is created for every image
    for category in file_list:
        for img_path in category:
            img = tf.keras.preprocessing.image.load_img(img_path, target_size=IMG_SIZE)
            img_array = tf.keras.preprocessing.image.img_to_array(img)
            img_batch = np.expand_dims(img_array, axis=0)
            img_preprocessed = preprocess_input(img_batch)
            data.append(model.predict(img_preprocessed))
            labels.append(img_path.split('/')[-2])

    # Make sure dimensions are (num_samples, 1280)
    data = np.squeeze(np.array(data))
    labels = np.reshape(labels, (-1,1))
    return data, labels

In [5]:
# Import packages needed to create a image classification model
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf
from keras.applications.resnet import preprocess_input
#from keras.preprocessing.image import ImageDataGenerator # No change required here as it is not being used
from keras.layers import Dense,GlobalAveragePooling2D
from keras.models import Model
from keras.layers import Dense,GlobalAveragePooling2D
from keras.callbacks import EarlyStopping
from tensorflow import keras
IMG_SIZE = (224, 224)
# Download the model, valid alpha values [0.25,0.35,0.5,0.75,1]
base_model = tf.keras.applications.ConvNeXtXLarge(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
# Add average pooling to the base
x = base_model.output
x = GlobalAveragePooling2D()(x)
model_frozen = Model(inputs=base_model.input,outputs=x)
# Get the transformed features from the dataset
# TODO: This can be moved to the FE stage of the pipeline
# label_map is not used anywhere right now. it has information
# about which label is mapped to which number
data, labels = convert_tf_dataset(PATH, model_frozen)
# Shuffle the dataset for training
shuffler = np.random.permutation(len(data))
data_shuffled = data[shuffler]
labels_shuffled = labels[shuffler]
print(data_shuffled)
num_features = data_shuffled.shape[1]

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/convnext/convnext_xlarge_notop.h5
[1m1393257616/1393257616[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 0us/step
['healthy', 'complex', 'scab', 'rust', 'frog_eye_leaf_spot', 'powdery_mildew']
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/s

In [6]:
import pandas as pd
feature_names = []
for a in range(0,num_features):
  feature_names.append('feature_' + str(a))
feature_names.append('label')
df = pd.DataFrame(data=np.hstack((data_shuffled,labels_shuffled)), columns=feature_names)
df.head()

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,feature_2039,feature_2040,feature_2041,feature_2042,feature_2043,feature_2044,feature_2045,feature_2046,feature_2047,label
0,0.36567572,0.0188274,-0.22239466,0.22568789,-0.3203387,0.4337461,0.044434436,-0.18600115,0.14610727,0.023728555,...,-0.37643838,0.30204073,0.33080378,-0.04659633,0.080659114,-0.2553101,1.3352122,1.125386,-0.13948657,scab
1,0.77313495,0.16711308,-0.35001457,-0.104920395,0.53499115,0.3847713,-0.16195647,-0.5486198,0.29221705,0.7428902,...,-0.1201446,-0.028426044,0.92144024,0.21721138,0.014293196,-0.32931262,0.6903188,0.9138281,0.10951196,complex
2,0.43943754,0.011704926,-0.3042966,0.6643401,0.13823329,-0.38540128,-0.25934204,-0.53941774,0.004914493,0.07197415,...,-0.2762829,0.09090581,-0.012168057,0.16745389,0.17900698,-0.27714252,1.5354592,0.5896861,-0.87383485,healthy
3,0.17121272,0.092225425,-0.00015780695,0.78443676,0.08274501,0.10657379,-0.04578708,-0.38701504,-0.04826533,-0.19952224,...,-0.10480989,0.059304953,0.26095712,-0.12523752,-0.16559798,-0.25364777,0.5244096,1.395751,-0.3649714,scab
4,0.59487003,-0.02780704,-0.14615948,0.45196867,0.27233103,-0.4677717,0.14361311,-0.27451771,0.08611167,0.10360235,...,-0.40874755,-0.13898471,0.86378646,0.20142505,-0.05383549,-0.19110854,0.26554182,-0.7336904,-0.022929234,healthy


In [7]:
df.to_csv('/content/drive/MyDrive/1:1_Hanish_Acharla/Dataset/Final_Dataset/Featurized DataSet/test_data.csv', index=False)
