# Flowers Classification Challenge with Computer Vision

### Importing the necessary packages

In [28]:
import scipy.io
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model
import numpy as np
import PIL as image
import os
import shutil

### Loading Data

In [2]:
#data containing chi-squared distances
#im unsure on how to use this in the context of computer vision
dist_mat_data = scipy.io.loadmat('distancematrices102.mat')

#contains label for each image
label_data = scipy.io.loadmat('imagelabels.mat')

#the indices for the splits between train, valid, and test
split_data = scipy.io.loadmat('setid.mat')

In [3]:
split_data

{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNX86, Created on: Thu Feb 19 17:38:58 2009',
 '__version__': '1.0',
 '__globals__': [],
 'trnid': array([[6765, 6755, 6768, ..., 8026, 8036, 8041]], dtype=uint16),
 'valid': array([[6773, 6767, 6739, ..., 8028, 8008, 8030]], dtype=uint16),
 'tstid': array([[6734, 6735, 6737, ..., 8044, 8045, 8047]], dtype=uint16)}

In [4]:
len(split_data['trnid'][0])

1020

In [5]:
len(split_data['valid'][0])

1020

In [34]:
"""
although it is labeled tst, it has the most pictures in it, so ill assume this
# is the training set
"""
len(split_data['tstid'][0])

6149

Code below gets the indices for each image depending on whether it is train, test or valid

In [7]:
train_indices = np.unique(split_data['tstid'][0])
train_indices = train_indices - 1
train_indices

array([   0,    1,    2, ..., 8185, 8187, 8188], dtype=uint16)

In [8]:
test_indices = np.unique(split_data['trnid'][0])
test_indices = test_indices - 1
test_indices

array([  27,   35,   78, ..., 8166, 8174, 8176], dtype=uint16)

In [9]:
valid_indices = np.unique(split_data['valid'][0])
valid_indices = valid_indices - 1
valid_indices

array([  16,   22,   37, ..., 8181, 8184, 8186], dtype=uint16)

In [10]:
#source_dir contains all the images
#valid, train, test_dir will contain the directories of images in respective
# categories
source_dir = 'jpg'
valid_dir = 'valid'
train_dir = 'train'
test_dir = 'test'

In [11]:
labels = (label_data['labels'][0])
labels

array([77, 77, 77, ..., 62, 62, 62], dtype=uint8)

In [12]:
train_labels=[]
valid_labels=[]
test_labels=[]

In [13]:
#sort images into their appropriate directories along with getting the proper labels
image_files = os.listdir(source_dir)
for i, filename in enumerate(image_files):
    if i in train_indices:
        shutil.move(os.path.join(source_dir, filename), os.path.join(train_dir, filename))
        train_labels.append(labels[i])
    elif i in test_indices:
        shutil.move(os.path.join(source_dir, filename), os.path.join(test_dir, filename))
        test_labels.append(labels[i])
    elif i in valid_indices:
        shutil.move(os.path.join(source_dir, filename), os.path.join(valid_dir, filename))
        valid_labels.append(labels[i])

In [16]:
train_labels = np.array(train_labels)
valid_labels = np.array(valid_labels)
test_labels = np.array(test_labels)

### Modeling

In [29]:
model = ResNet50(include_top=False, weights='imagenet', input_tensor=Input(shape=(224, 224, 3)))
model

<keras.engine.functional.Functional at 0x2202442a4f0>

In [30]:
for layer in model.layers:
    layer.trainable = False

x = model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
predictions = Dense(102, activation='softmax')(x)

In [31]:
model = Model(inputs=model.input, outputs=predictions)

In [32]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])