# HW6

## Problem 1

Use the Oxford Flowers 102 category dataset from HW 1 and the CSV file
given here for this problem. Use a data generator to read data from folders
and form tensors to feed into a network. You can use image augmentation as
needed. Use the CSV for this purpose. Create a training and validation set.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import cv2
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from skimage import *
from sklearn.model_selection import train_test_split

In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print(tf.__version__)

Num GPUs Available:  1
2.10.0


In [4]:
INPUT_SIZE = 256
BATCH_SIZE = 64

SEED = 12345
np.random.seed(SEED)


df = pd.read_csv(r'C:\Users\Ethan\Desktop\CS450\flowers.csv')
labels_new = df.sort_values('labels')
myclasses = list(labels_new.labels.unique())
NUMCLASSES = len(myclasses)
path = r'C:\Users\Ethan\Desktop\CS450\oxford_flowers_102\jpg'

In [5]:
from skimage import exposure

def preprocess(img):
    # Contrast stretching
    p2, p98 = np.percentile(img, (2, 98))
    img_rescale = exposure.rescale_intensity(img, in_range=(p2, p98))
    
    return img_rescale


In [6]:
train_datagen = ImageDataGenerator(
                                featurewise_center=False,
                                samplewise_center=True,
                                featurewise_std_normalization=False,
                                samplewise_std_normalization=True,
                                zca_epsilon=1e-06,
                                rotation_range=5,
                                width_shift_range=0.05,
                                height_shift_range=0.05,
                                brightness_range=None,
                                shear_range=0.5,
                                zoom_range=0.05,
                                channel_shift_range=0.0,
                                fill_mode="nearest",
                                cval=0.0,
                                horizontal_flip=False,
                                vertical_flip=False,
                                rescale=1./255,
                                preprocessing_function=None,#preprocess,
                                data_format=None,
                                validation_split=0.2,
                                dtype=None,
                            )


In [7]:
# Make a dtaframe with all the file paths and labels
allfilelist = []
alllabels = []
alllabels_word = []
for i in range(NUMCLASSES):
    filepath = path
    filelist = os.listdir(filepath)
    for f in filelist:
        if f.lower() == 'thumbs.db':
            continue
        
        fullpath = filepath + '/' + f
        allfilelist.append(fullpath)
        alllabels.append(i)
        alllabels_word.append(myclasses[i])
        
d = {'filename':allfilelist,'labelnum':alllabels,'label':alllabels_word}

df = pd.DataFrame(d)
df["label"] = df['label'].astype(str)

In [8]:
df

Unnamed: 0,filename,labelnum,label
0,C:\Users\Ethan\Desktop\CS450\oxford_flowers_10...,0,1
1,C:\Users\Ethan\Desktop\CS450\oxford_flowers_10...,0,1
2,C:\Users\Ethan\Desktop\CS450\oxford_flowers_10...,0,1
3,C:\Users\Ethan\Desktop\CS450\oxford_flowers_10...,0,1
4,C:\Users\Ethan\Desktop\CS450\oxford_flowers_10...,0,1
...,...,...,...
835273,C:\Users\Ethan\Desktop\CS450\oxford_flowers_10...,101,102
835274,C:\Users\Ethan\Desktop\CS450\oxford_flowers_10...,101,102
835275,C:\Users\Ethan\Desktop\CS450\oxford_flowers_10...,101,102
835276,C:\Users\Ethan\Desktop\CS450\oxford_flowers_10...,101,102


In [9]:
training_set = train_datagen.flow_from_dataframe(
                                            dataframe=df,
                                            directory='',
                                            x_col="filename",
                                            y_col="label",
                                            #weight_col=None,
                                            target_size=(INPUT_SIZE, INPUT_SIZE),
                                            color_mode="rgb",
                                            classes=None,
                                            class_mode="categorical",
                                            batch_size=BATCH_SIZE,
                                            shuffle=True,
                                            seed=None,
                                            save_to_dir=None,
                                            save_prefix="",
                                            save_format="png",
                                            subset='training',
                                            interpolation="nearest",
                                            #validate_filenames=True
                                        )

validation_set = train_datagen.flow_from_dataframe( dataframe=df,
                                            directory='',
                                            x_col="filename",
                                            y_col="label",
                                            #weight_col=None,
                                            target_size=(INPUT_SIZE, INPUT_SIZE),
                                            color_mode="rgb",
                                            classes=None,
                                            class_mode="categorical",
                                            batch_size=BATCH_SIZE,
                                            shuffle=True,
                                            seed=None,
                                            save_to_dir=None,
                                            save_prefix="",
                                            save_format="png",
                                            subset='validation',
                                            interpolation="nearest",
                                            #validate_filenames=True
                                            )

Found 668223 validated image filenames belonging to 102 classes.
Found 167055 validated image filenames belonging to 102 classes.


# Problem 2

Design a small sequential CNN to classify the flowers into categories. I
intentionally don’t specify a design – use something that can be trained on
your computer

In [10]:

#from sklearn.model_selection import train_test_split
#os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

model = Sequential()

model.add(InputLayer(input_shape=[INPUT_SIZE,INPUT_SIZE,3])) #keras will internally add batch dimension

model.add(Conv2D(filters=32,kernel_size=3,strides=1,padding='same', activation='relu'))
model.add(Conv2D(filters=32,kernel_size=3,strides=1,padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=2,padding='same'))
model.add(Dropout(0.4))

model.add(Conv2D(filters=64,kernel_size=3,strides=1,padding='same', activation='relu'))
model.add(Conv2D(filters=64,kernel_size=3,strides=1,padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=2,padding='same'))
model.add(Dropout(0.4))

model.add(Conv2D(filters=128,kernel_size=3,strides=1,padding='same', activation='relu'))
model.add(Conv2D(filters=128,kernel_size=3,strides=1,padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=2,padding='same'))
model.add(Dropout(0.4))


model.add(Flatten())


model.add(Dense(256,activation='relu'))
model.add(Dropout(0.4))

model.add(Dense(128,activation='relu'))
model.add(Dropout(0.4))

model.add(Dense(NUMCLASSES,activation='softmax'))


model.compile(optimizer=Adam(lr=0.00001), loss='categorical_crossentropy',metrics=['accuracy'])
model.summary()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 256, 256, 32)      896       
                                                                 
 conv2d_1 (Conv2D)           (None, 256, 256, 32)      9248      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 128, 128, 32)     0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 128, 128, 32)      0         
                                                                 
 conv2d_2 (Conv2D)           (None, 128, 128, 64)      18496     
                                                                 
 conv2d_3 (Conv2D)           (None, 128, 128, 64)      36928     
                                                        

  super().__init__(name, **kwargs)


In [11]:
#Training

for epoch in range(0,50):
    print("Epoch",epoch)
    if epoch != 0:
        # Load Model Weights
        model.load_weights('model-normalized-aug.h5')    
    history = model.fit(training_set,
    steps_per_epoch=len(training_set),
                   epochs=1,
                   validation_data=validation_set,
                   validation_steps = len(validation_set))

    model.save_weights("model-normalized-aug.h5")
    print("Saved model to disk after",epoch+1,"epochs.")

Epoch 0
Saved model to disk after 1 epochs.
Epoch 1
Saved model to disk after 2 epochs.
Epoch 2


KeyboardInterrupt



In [None]:
history.history.keys()

In [None]:
fig = plt.figure(figsize=(8,4))
plt.plot(history.history['loss'],color='red')
plt.plot(history.history['accuracy'],color='green')
plt.plot(history.history['val_loss'],color='magenta')
plt.plot(history.history['val_accuracy'],color='blue')
plt.show()

In [None]:
#Testing

fig = plt.figure(figsize=(12, 18))
for i in range(testdigits.shape[0]):
    y = fig.add_subplot(9, 6, i+1)
    img = digit(testdigits[i,:])
    img = img.reshape(1,IMG_SIZE,IMG_SIZE,1)
    model_out = model.predict(img)
    print(np.uint16(model_out*100)/100)
    str_label= str(np.argmax(model_out))
    
    y.imshow(digit(testdigits[i,:]),cmap='gray')
    plt.title(str_label)
    y.axes.get_xaxis().set_visible(False)
    y.axes.get_yaxis().set_visible(False)
    if i==53:
        break;
plt.show()


# Extra Credit

You can set aside 20% of the data at the beginning for
testing. Now run your classifier on this 20% and plot the accuracy on a
confusion matrix. You may take help from other programs done in class or
from the web. 