In [1]:
import pandas as pd
import numpy as np
import glob
import os
import tensorflow as tf
from tensorflow.keras import layers, models

def excel_reader(folder_path): 
    #takes in a folder path containing data files in .xlsx format and outputs a 3D numpy array of said xlsx files
    files = glob.glob(folder_path + "/*.xlsx")
    df_list = []
    for file in files:
        df = pd.read_excel(file)
        df_list.append(df)
    np_array = np.array(list(map(lambda x: x.to_numpy(), df_list)))
    np_array = np_array.reshape((len(df_list), len(df.axes[0]), len(df.axes[1]))) #(x,y,z) x is number of elements, y is rows, z is cols
    return np_array

def listdir_non_hidden(folder_path): # function to only gather non-hidden folders
    folders = []
    for folder in os.listdir(folder_path):
        if not folder.startswith('.'):
            folders.append(folder)
    return folders
    

In [2]:
path = "../data/training data/matrix-data/" # input path to data folder here

# gather training datas & labels
folders = listdir_non_hidden(path)
datas = []
for folder in folders:
    data = excel_reader(path + folder)
    datas.append(data)
train_data = np.vstack(datas)
train_labels = []
for i in range(len(datas)):
    for j in range(len(datas[i])):
        train_labels.append(i)
train_labels = np.asarray(train_labels)

# normalize train_data
max_val = train_data.max()
min_val = train_data.min()
train_data = (train_data+abs(min_val))/(max_val+abs(min_val))

# add dimensionality along last axis
train_data = np.expand_dims(train_data, axis=len(train_data.shape))

In [3]:
# input into model
x,y,z = train_data.shape[1], train_data.shape[2], train_data.shape[3]
model = tf.keras.Sequential([layers.Conv2D(32, (3, 3), activation='relu', input_shape=(x,y,z)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu', input_shape=(x,y,z)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu', input_shape=(x,y,z)),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(input_shape=(x,y)),
    layers.Dense(len(datas)*2, activation='relu'),
    layers.Dense(len(datas))
])


model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

#model.fit(train_data, train_labels, epochs=8, validation_split = 0.3)

In [4]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 424, 26, 32)       320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 212, 13, 32)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 210, 11, 64)       18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 105, 5, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 103, 3, 64)        36928     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 51, 1, 64)        0