In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, Reshape
from keras.models import load_model
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from sys import getsizeof
import random

In [None]:
dataset = np.load("./dataset_win.npy",allow_pickle=True)
print(dataset.shape)

In [None]:
#Slice dataset into labels and cases
x,y = dataset.T

In [None]:
#Split into train and test datasets
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.33)


xtrain_norm = []
xtest_norm = []

for i in xtrain:
    i = normalize(i, axis=1, norm='l1')
    xtrain_norm.append(i)
    
for i in xtest:
    i = normalize(i, axis=1, norm='l1')
    xtest_norm.append(i)
    
xtrain_norm = np.array(list(x for x in xtrain_norm))
ytrain = np.array(list(x for x in ytrain))
xtest_norm = np.array(list(x for x in xtest_norm))
ytest = np.array(list(x for x in ytest))
    
print("Size of training dataset: " + str(xtrain_norm.shape))
print("Size of testing dataset:  " + str(xtest_norm.shape))
print("Size of unit class:       " + str(xtest[0].shape))
print("Labels in dataset:        " + str(np.unique(y)))

print(ytrain)
print(ytest)

In [None]:
model = Sequential()
model.add(Reshape((256, 256, 1)))
model.add(Conv2D(64, kernel_size=(3,3), input_shape=(256,256,1), name="conv"))
model.add(MaxPooling2D(pool_size=(3,3)))
model.add(Activation('relu'))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation="relu", name="layer1"))
model.add(layers.Dense(9, activation="softmax", name="layer2"))

In [None]:
model.compile(loss="sparse_categorical_crossentropy",optimizer="rmsprop",metrics=["accuracy"])

In [None]:
history = model.fit(xtrain_norm, ytrain, batch_size=6, epochs=10)

In [None]:
print(model.summary())

In [None]:
model.evaluate(xtest_norm, ytest)

In [None]:
def get_str(n):
    if n == 0: return "GIF"
    if n == 1: return "PE64"
    if n == 2: return "MP3"
    if n == 3: return "FLAC"
    if n == 4: return "DLL"
    if n == 5: return "HTML"
    if n == 6: return "PDF"
    if n == 7: return "PNG"
    if n == 8: return "XML"


In [None]:
for x in range(len(ytest)):
    p =  model.predict(xtest_norm[x].reshape(1, 256, 256, 1))
    if not ytest[x] == p.argmax(): 
        print(str(x) + "\twas: " + get_str(ytest[x]) + "\tpredicted: " + get_str(p.argmax()))

In [None]:
image_index = 25
plt.figure(figsize = (10,10))
plt.imshow(xtest_norm[image_index],cmap='hot',vmax=0.02)
pred = model.predict(xtest_norm[image_index].reshape(1, 256, 256, 1))
print("Prediction: " + get_str(pred.argmax()))
print("Actual:     " + get_str(ytest[image_index]))
print("Confidence Levels: ")
categories = ['gif','pe64','mp3','flac','dll','html','pdf','png','xml']
p =  model.predict(xtest_norm[image_index].reshape(1, 256, 256, 1))

for i in range(len(categories)):
    np.set_printoptions(precision=4)
    print(categories[i] + "\t = " + str('{:.20f}'.format(p[0][i])))