In [None]:
# General data manipulation
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import cv2
import os
import sys
import time
from PIL import Image # pip install pillow
from keras.utils import np_utils

# Deep learning setup (NN, CNN)
from keras.models import Sequential 
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D, Conv2D, AveragePooling2D

# Model evaluation
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

from keras.utils import to_categorical

In [None]:
def img2arr(img):
    return np.asarray(img.getdata(), dtype=np.uint8).reshape(img.height, img.width, -1)[:,:,0]

from scipy.ndimage import *

def getImg(full_img,k=288, shuffle=False, apply_filter=False):
    img = full_img[(288-k):(288+k), (288-k):(288+k)].astype(float)
    img -= np.median(img, axis=0)
    if apply_filter:
        img = grey_closing(gaussian_gradient_magnitude(img,5), 9)
    return img

In [None]:
# %%time
# DIR = os.getcwd() + '/idao_dataset'

# # get train file names and convert to dataframe
# # you may need to change your working directory first:
# # os.chdir('/your_path')
# ER_file_names = os.listdir(DIR + '/train/ER/')
# NR_file_names = os.listdir(DIR + '/train/NR/')

# ER = pd.DataFrame([[y.replace(';1.png','').replace('ev','') for y in x.split('_')] + [x] for x in ER_file_names])
# NR = pd.DataFrame([[y.replace(';1.png','').replace('ev','') for y in x.split('_')] + [x] for x in NR_file_names])

# # only few columns have distinct values
# # print(ER.apply(lambda x: len(x.unique())))
# # print(NR.apply(lambda x: len(x.unique())))

# ER = ER[[5,6,0,15,16,17]].rename(columns={5:'type',6:'energy',0:'num',15:'run',16:'ev',17:'path'})
# NR = NR[[6,7,0,17,18,19]].rename(columns={6:'type',7:'energy',0:'num',17:'run',18:'ev',19:'path'})
# data = ER.append(NR, ignore_index=True)
# data = data.astype(dict(zip(data.columns,[str,int,float,str,int,str])))
# data['type_2'] = (data.type == 'ER')*1
# data.head()

In [None]:
%%time
er_data = pd.read_pickle('er_data(1-6758).pkl')
nr_data = pd.read_pickle('nr_data(1-6646).pkl')

In [None]:
%%time
# full
# input_df = er_data[:2000].append(nr_data[:2000], ignore_index = True)
input_df = er_data.append(nr_data, ignore_index = True)

In [None]:
%%time
# N = 250
# k = 64
# img_list = []
# for i in range(len(input_df)):
#     img_list.append(getImg(input_df['img_array'][i], k=k, apply_filter=True))
# target_data = er_data['type_2_ER'][:2000].append(nr_data['type_2_ER'][:2000])

N = 250
k = 64
img_list_tmp = []
target_data = []
in_sample = {1: ['3',"10","30"], 0: ["1","6",'20']} # ER = 1 # in_sample = {'ER': [3,10,30], 'NR': [1,6,20]} # ER = 1
for Type in in_sample:
    for Energy in in_sample[Type]:
        print(Type, Energy)
        tmp = input_df[(input_df["type_2_ER"] == Type) & (input_df["energy"] == Energy)][:300]
        tmp.head()
        img_list_tmp.append(tmp)
        target_data.append(tmp['energy'])
target_data = pd.concat(target_data, ignore_index = True)
for i in range(0, len(target_data)):
    target_data[i] = int(target_data[i])
        
img_list_tmp = pd.concat(img_list_tmp, ignore_index = True)
img_list = []
for i in range(len(img_list_tmp)):
    img_list.append([getImg(img_list_tmp['img_array'][i], k=k, apply_filter=True)]) # img_list_tmp['type_2_ER'][i]

    
    
# M = 10
# in_sample = {'ER': [3,10,30], 'NR': [1,6,20]}
# predicts_in = pd.DataFrame()
# imgs_in = []
# i = 0
# for _ in range(M):
#     for Type in in_sample:
#         for Energy in in_sample[Type]:
#             predicts_in = predicts_in.append(pd.DataFrame({'type':Type,'energy':Energy},index=[i]))
#             i += 1
# for i in range(len(predicts_in)):
#     Type, Energy = predicts_in.iloc[i]
#     img = getImg(Type, Energy, k=64, shuffle=True, apply_filter=True)
#     imgs_in.append(img)
# imgs_in = np.array(imgs_in).reshape((len(predicts_in),128,128,1))
# predicts_in['predict']=['ER' if x==1 else 'NR' for x in model_1.predict(imgs_in).round()]
# predicts_in['correct'] = predicts_in['type'] == predicts_in['predict']

In [None]:
img_array = np.array(img_list)
# img_array.shape
# reshape the matrix for CNN input
img_array = img_array.reshape(img_array.shape[0],128,128,1)
print(img_array.shape)



In [None]:
target_data = np.array(target_data)
target_data = target_data.reshape(len(target_data),1)
target_data = np_utils.to_categorical(target_data, 31)
print(target_data.shape)

In [None]:
# Train and Test data Split
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(
    img_array, target_data, test_size = 0.3)

print(len(x_train))
print(len(y_train))
print(len(x_test))
print(len(y_test))

In [None]:
# CNN for graphic data
# #create model
# cnn_type = Sequential()
# #add model layers
# cnn_type.add(Conv2D(16, kernel_size=9, activation='relu', input_shape=(128,128,1)))
# cnn_type.add(MaxPooling2D(pool_size=(2, 2)))
# cnn_type.add(Conv2D(32, kernel_size=5, activation='relu'))
# # cnn_type.add(Dropout(0.5))
# cnn_type.add(MaxPooling2D(pool_size=(2, 2)))
# cnn_type.add(Conv2D(64, kernel_size=3, activation='relu'))
# cnn_type.add(MaxPooling2D(pool_size=(2, 2)))
# # cnn_type.add(Conv2D(128, kernel_size=3, activation='relu'))
# cnn_type.add(Flatten())
# cnn_type.add(Dense(128, activation='relu'))
# cnn_type.add(Dense(1, activation='sigmoid')) # He, e
# # cnn_type.add(Dense(2, activation='softmax'))

# cnn_type.compile(optimizer='adam',
# #     loss='categorical_crossentropy',
#     loss='binary_crossentropy',
#     metrics=['accuracy'])


#create model
cnn_energy_er = Sequential()
#add model layers
cnn_energy_er.add(Conv2D(16, kernel_size=9, activation='relu', input_shape=(128,128,1)))
cnn_energy_er.add(MaxPooling2D(pool_size=(2, 2)))
cnn_energy_er.add(Conv2D(32, kernel_size=5, activation='relu'))
# cnn_energy_er.add(Dropout(0.5))
cnn_energy_er.add(MaxPooling2D(pool_size=(2, 2)))
cnn_energy_er.add(Conv2D(64, kernel_size=3, activation='relu'))
cnn_energy_er.add(MaxPooling2D(pool_size=(2, 2)))
# cnn_energy_er.add(Conv2D(128, kernel_size=3, activation='relu'))
cnn_energy_er.add(Flatten())
cnn_energy_er.add(Dense(128, activation='relu'))
# cnn_energy_er.add(Dense(1, activation='sigmoid'))
cnn_energy_er.add(Dense(31, activation='sigmoid'))

cnn_energy_er.compile(optimizer='adam',
    loss='categorical_crossentropy',
#     loss=mae,
    metrics=['mean_absolute_error'])

In [None]:
%%time 
# cnn_type_model = cnn_type.fit(x_train, y_train, 
#                               batch_size=64,
#                               epochs=10,
#                               # verbose=1,
#                               validation_data=(x_test, y_test))

cnn_energy_er_model = cnn_energy_er.fit(x_train, y_train, 
                              batch_size=64,
                              epochs=10,
                              # verbose=1,
                              validation_data=(x_test, y_test))

In [None]:
# plt.plot(cnn_type_model.history['accuracy'])
# plt.plot(cnn_type_model.history['val_accuracy'])

plt.plot(cnn_energy_er_model.history['mean_absolute_error'])


In [None]:
cnn_energy_er_model.history

In [None]:
score, acc = cnn_type.evaluate(x_test, y_test)
print('Test score:', score)
print('Test accuracy:', acc)

In [None]:
# Prediction on testing data
# y_pred = np.round(cnn_type.predict(x_test))
# cm = confusion_matrix(y_test,y_pred) # create a confusion matrix



y_pred = cnn_energy_er.predict(x_test)

In [None]:
y_test

In [None]:
y_pred

In [None]:
# Plot the confusion matrix
import seaborn as sns
sns.heatmap(cm,
            cmap=plt.cm.Blues,
            annot=True, 
            annot_kws={"size": 12}, 
            fmt="d") # font size
plt.xlabel('Predicted label')
plt.ylabel('True label')
plt.title('Confusion martix of Neural Network Model')
plt.show()