In [1]:
import numpy as np
import keras as k
import json
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

In [2]:
"""
This function reads all of the files and lables from the json file then opens those files and converts the
images to np arrays to be fed into the nn.
NOTE: this is not a good thing to do if your dataset is too big, this may crash your computer
"""
def imagegen():
    f = open('InfraredSolarModules/module_metadata.json',) 
    data = json.load(f) 
    f.close() 
    img = []
    cat = []
    for i in range(20000):
        img.append(np.array(k.preprocessing.image.load_img('InfraredSolarModules/' + data[str(i)]['image_filepath'], color_mode = "grayscale")))
        cat.append(data[str(i)]['anomaly_class'])
    return np.array(img), np.array(cat)

In [3]:
img, labels = imagegen() # load the data and labels
img = img.reshape(20000, 40, 24, 1) # reshape to show color channels
img = img / 255 # scale data

In [4]:
X_train, X_test, y_train, y_test = train_test_split(img, labels, test_size=0.3)
labelencoder = LabelEncoder() # Make a label encoder 
y_train = labelencoder.fit_transform(y_train) # encode labels to numbers
y_test = labelencoder.transform(y_test)
y_train = k.utils.to_categorical(y_train) # encode nubers to catagorical labels
y_test = k.utils.to_categorical(y_test)

In [5]:
X_train.shape

(14000, 40, 24, 1)

In [6]:
y_train.shape

(14000, 12)

In [7]:
# TODO: Upsample anomalies that don't happen often
counts = {}
for i in y_train:
    try:
        counts[str(i)] += 1
    except:
        counts[str(i)] = 0

In [8]:
counts

{'[0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]': 6985,
 '[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]': 1166,
 '[0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]': 171,
 '[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]': 1333,
 '[0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]': 1066,
 '[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]': 889,
 '[0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]': 544,
 '[0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]': 645,
 '[0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]': 751,
 '[0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]': 173,
 '[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]': 146,
 '[0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]': 119}

In [9]:
for i in range(len(y_train)):
    if counts[str(y_train[i])] < 1000:
        new_X_train = np.array([np.fliplr(X_train[i]), np.flipud(X_train[i]), np.flipud(np.fliplr(X_train[i]))])
        new_y_train = np.array([y_train[i], y_train[i], y_train[i]])
        X_train = np.concatenate((X_train, new_X_train), axis=0)
        y_train = np.concatenate((y_train, new_y_train), axis=0)

In [10]:
X_train.shape

(24338, 40, 24, 1)

In [11]:
y_train.shape

(24338, 12)

In [48]:
model = k.Sequential()
model.add(k.layers.Conv2D(filters=40, kernel_size=(5,5), strides=(1, 1), padding="valid", input_shape=(40, 24, 1)))
model.add(k.layers.Conv2D(filters=40, kernel_size=(3,3), strides=(1, 1), padding="valid"))
model.add(k.layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2), padding="same"))
model.add(k.layers.Flatten())
model.add(k.layers.Dense(450))
model.add(k.layers.Activation('relu'))
model.add(k.layers.Dense(12))
model.add(k.layers.Activation('softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=["accuracy"])
model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_23 (Conv2D)           (None, 36, 20, 40)        1040      
_________________________________________________________________
conv2d_24 (Conv2D)           (None, 34, 18, 40)        14440     
_________________________________________________________________
max_pooling2d_21 (MaxPooling (None, 17, 9, 40)         0         
_________________________________________________________________
flatten_9 (Flatten)          (None, 6120)              0         
_________________________________________________________________
dense_18 (Dense)             (None, 450)               2754450   
_________________________________________________________________
activation_18 (Activation)   (None, 450)               0         
_________________________________________________________________
dense_19 (Dense)             (None, 12)              

In [49]:
model.fit(X_train, y_train, epochs=20, batch_size=20, verbose=1, shuffle=True)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x148ef84f0>

In [50]:
loss, acc = model.evaluate(X_test, y_test, verbose=1)
print(f"Prediction loss, accuracy: {loss, acc}")

Prediction loss, accuracy: (2.179806709289551, 0.6996666789054871)


In [15]:
model.save("model.h5")

In [None]:
model = load_model('model.h5')