In [1]:
# model library
import tensorflow as tf

# image processing libraries
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from PIL import Image
import numpy as np

# folder reading libraries
import glob
import os


### Loading data

In [2]:
def gather_png_data(folder_path): # function to read in PNG image files
    files = glob.glob(folder_path + "/*.png")
    img_array = []
    l,t,r,b = 30,0,400,390 # left and top pixel until right and bottom pixel to crop that were arbitrarily set
    for img in files:
        img = load_img(img, color_mode = "grayscale")
        img = img.crop((l,t,r,b)) # cropping is better for model & training
        img = img_to_array(img)  # convert PIL format to numpy array (note: 255 is white and 0 black)
        img_array.append(img)
    img_array = np.array(img_array)
    # map values to 0-1 such that 0 represents white, 1 represents black: better for model to process since white is not needed
    img_array = -(img_array/255) + 1
    return np.squeeze(img_array)

def listdir_non_hidden(folder_path): # function to only gather non-hidden folders
    folders = []
    for folder in os.listdir(folder_path):
        if not folder.startswith('.'):
            folders.append(folder)
    return folders

rel_folder_path = ("../data/training data/data (E,v,lambda)/Line Plots/") # folder path of training data relative to code, note: add "/" at end
categories = listdir_non_hidden(rel_folder_path)
data = []
class_names = []

# iterates through data folder and appends data into a list
for folders in categories:
    datas = gather_png_data(rel_folder_path + folders)
    data.append(datas)

# self-made labels for easy reference into the categories folder
for i in range(len(data)):
    class_names.append(i+1) # number in class_names corresponds to the number in categories array, not the index

### Processing data

In [3]:
split_percent = 30 # represents the percentage of validation data for each category that will be taken out

train_labels = []
validation_labels = []
split_arr = []

for i in range(len(data)):
    # split data into training and validation sets
    x = round(data[i].shape[0] * split_percent/100)
    split = np.array_split(data[i],[0,x],axis=0) # array split returns 3 values of sub_arrays, 1 being a 0 dimension (useless)
    split_arr.append(split)
    
    # generate own labels for training purposes into train & validate group, i.e. each data[i] group is its own category
    for j in range(len(data[i])):
        if j<x:
            validation_labels.append(i)
        else:
            train_labels.append(i)
# convert labels to numpy array to input into model         
train_labels = np.array(train_labels)
validation_labels = np.array(validation_labels)

# create training & validating set by stacking along axis = 0
validation = []
train = []
for i in range(len(split_arr)):
    validation.append(split_arr[i][1])
    train.append(split_arr[i][2])

train_data = np.vstack(train)
validation_data = np.vstack(validation)

x,y= train_data.shape[1],train_data.shape[2]

In [4]:
train_data.shape

(3696, 390, 370)

### Model training

In [5]:
model = tf.keras.Sequential([ #number of nodes were arbitarily set based on validation accuracy 
    tf.keras.layers.Flatten(input_shape=(x,y)),
    tf.keras.layers.Dense(len(data)*2, activation='relu'),
    tf.keras.layers.Dense(len(data))
])


model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(train_data, train_labels, epochs=8)

validate_loss, validate_acc = model.evaluate(validation_data,  validation_labels, verbose=2, batch_size=32) #verbose is method of acknowledging program is running
print('\nValidation accuracy:', validate_acc)


Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
50/50 - 0s - loss: 0.0695 - accuracy: 0.9824 - 410ms/epoch - 8ms/step

Validation accuracy: 0.9823566675186157


In [1]:
model.summary()

NameError: name 'model' is not defined

### Model save

In [7]:
model_folder = '../model'
model.save('../model/cyclic_voltammogram_model')

INFO:tensorflow:Assets written to: ../model/cyclic_voltammogram_model\assets
