In [11]:
import numpy as np
import keras as k
import json
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

In [25]:
"""
This function reads all of the files and lables from the json file then opens those files and converts the
images to np arrays to be fed into the nn.
NOTE: this is not a good thing to do if your dataset is too big, this may crash your computer
"""
def imagegen():
    f = open('InfraredSolarModules/module_metadata.json',) 
    data = json.load(f) 
    f.close() 
    img = []
    cat = []
    for i in range(20000):
        img.append(np.array(k.preprocessing.image.load_img('InfraredSolarModules/' + data[str(i)]['image_filepath'], color_mode = "grayscale")))
        cat.append(data[str(i)]['anomaly_class'])
    return np.array(img), np.array(cat)

In [26]:
img, labels = imagegen() # load the data and labels
img = img.reshape(20000, 40, 24, 1) # reshape to show color channels
img = img / 255 # scale data

In [27]:
X_train, X_test, y_train, y_test = train_test_split(img, labels, test_size=0.2)
labelencoder = LabelEncoder() # Make a label encoder 
y_train = labelencoder.fit_transform(y_train) # encode labels to numbers
y_test = labelencoder.transform(y_test)
y_train = k.utils.to_categorical(y_train) # encode nubers to catagorical labels
y_test = k.utils.to_categorical(y_test)

In [28]:
X_train.shape

(16000, 40, 24, 1)

In [29]:
y_train.shape

(16000, 12)

In [8]:
# TODO: Upsample anomalies that don't happen often
counts = {}
for i in y_train:
    try:
        counts[str(i)] += 1
    except:
        counts[str(i)] = 0

In [9]:
counts

{'[0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]': 603,
 '[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]': 887,
 '[0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]': 7023,
 '[0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]': 181,
 '[0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]': 1046,
 '[0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]': 660,
 '[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]': 1309,
 '[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]': 1112,
 '[0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]': 726,
 '[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]': 143,
 '[0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]': 122,
 '[0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]': 176}

In [10]:
for i in range(len(y_train)):
    if counts[str(y_train[i])] < 1000:
        new_X_train = np.array([np.fliplr(X_train[i]), np.flipud(X_train[i]), np.flipud(np.fliplr(X_train[i]))])
        new_y_train = np.array([y_train[i], y_train[i], y_train[i]])
        X_train = np.concatenate((X_train, new_X_train), axis=0)
        y_train = np.concatenate((y_train, new_y_train), axis=0)

KeyboardInterrupt: 

In [17]:
X_train.shape

(14000, 40, 24, 1)

In [18]:
y_train.shape

(14000, 12)

In [46]:
model = k.Sequential()
model.add(k.layers.Conv2D(filters=15, kernel_size=(3,3), strides=(1, 1), padding="valid", input_shape=(40, 24, 1)))
model.add(k.layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2), padding="same"))
model.add(k.layers.Conv2D(filters=15, kernel_size=(3,3), strides=(1, 1), padding="valid"))
model.add(k.layers.MaxPooling2D(pool_size=(2, 2), strides=(2,2), padding="same"))
model.add(k.layers.Flatten())
model.add(k.layers.Dense(675))
model.add(k.layers.Activation('relu'))
model.add(k.layers.Dense(12))
model.add(k.layers.Activation('softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=["accuracy"])
model.summary()

Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_22 (Conv2D)           (None, 38, 22, 15)        150       
_________________________________________________________________
max_pooling2d_22 (MaxPooling (None, 19, 11, 15)        0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 17, 9, 15)         2040      
_________________________________________________________________
max_pooling2d_23 (MaxPooling (None, 9, 5, 15)          0         
_________________________________________________________________
flatten_11 (Flatten)         (None, 675)               0         
_________________________________________________________________
dense_25 (Dense)             (None, 675)               456300    
_________________________________________________________________
activation_25 (Activation)   (None, 675)             

In [47]:
for i in range(20):
    print(i)
    model.fit(X_train, y_train, epochs=1, batch_size=20, verbose=1, shuffle=True)
    model.evaluate(X_test, y_test, verbose=1)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


In [50]:
loss, acc = model.evaluate(X_test, y_test, verbose=1)
print(f"Prediction loss, accuracy: {loss, acc}")

Prediction loss, accuracy: (2.179806709289551, 0.6996666789054871)


In [15]:
model.save("model.h5")

In [None]:
model = load_model('model.h5')