In [16]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import keras
from keras.layers import convolutional, Dense, Activation,pooling
from keras.layers import Input, merge, Convolution2D, MaxPooling2D, UpSampling2D, Flatten, Dropout
from keras.optimizers import Adam, SGD
from keras.models import Sequential, Model
from keras.callbacks import Callback, EarlyStopping, ModelCheckpoint, LearningRateScheduler
from keras.regularizers import l2
from keras import backend as K
from keras.utils import np_utils
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
import cPickle as pickle
import scipy.io
import numpy as np
import h5py

config = tf.ConfigProto()
#config.gpu_options.per_process_gpu_memory_fraction = 1.0
set_session(tf.Session(config=config))



###################################### LOADING DATA ##############################

### Create a dictionary that contains all the dictiories within each dataset (workspace) we want to load ###

#Go to the data directory
dataDir = "/scratch/barbieri/DATA_CNN_networks/Data/"
Load_Data = {}  
for i in range(0, len(os.listdir( dataDir ))):  
    Load_Data[i] = {}  #Dictionary for a single workspace

    
### lOAD THE DATA STORING THEM IN A DICTIONARY ###
i = 0
for file in os.listdir( dataDir ):
    Load_Data[i]['File'] =  scipy.io.loadmat( dataDir+file )
    i = i+1

    
#### Define another dictionary for storing intensity curves and classes ### 
#From each dataset, the intensity curve and classes are extracted and stored in another dictionary
DATA = {}
N_of_file =len(os.listdir( dataDir ))
for i in range(0, N_of_file):
        DATA[i] = {}


#### Fill the dictionary ###
Ncell_classified = 200 #Number of classified cells per dataset
for i in range(0, N_of_file):
    #Decision array for each dataset
    DATA[i]['decision'] = Load_Data[i]['File']['decisionlist']
    #Size of the matrix to store intensity values
    NumofData = len(Load_Data[i]['File']['RAWDATA'][0,0]['files'][0,0][0]['intensity'])
    Int_curve_len = len(Load_Data[4]['File']['RAWDATA'][0,0]['files'][0,0][0]['intensity'][0][0])
    #Empty matrix to store all the intensity values of all the cells
    DATA[i]['intensity'] = np.zeros((Ncell_classified,  Int_curve_len ))
    for k in range(0,Ncell_classified): #Only the first 200 were classified
        DATA[i]['intensity'][k,:] = Load_Data[i]['File']['RAWDATA'][0,0]['files'][0,0][0]['intensity'][k][0,0:900] #copy the first 900 values

        
### Classes array: contain values from all the data ###
Classes=[]
for i in range( 0, N_of_file ):
    Classes = np.append(Classes , DATA[i]['decision']).astype(int)

    
### Intensities matrix: contain values from all the data    
NTot_Data = N_of_file*Ncell_classified
Intensity = np.zeros((0, DATA[0]['intensity'].shape[1]))
for i in range( 0, N_of_file ):
    Intensity = np.append(Intensity, DATA[i]['intensity'][:,:], axis=0)

    
### Elimate intensity profiles with NaN values and corresponding classes ###
nan_indices = np.where(np.isnan(Intensity))
Intensity = np.delete(Intensity,nan_indices[0], axis = 0)
Classes = np.delete(Classes,nan_indices[0], axis = 0)
#Redefine the total number of data
total = len(Intensity)

# DUPLICATE Intensity profiles adding noise to the second group
print(total)
Intensity_more_curves_noise = np.zeros((2*total,  Int_curve_len ))
Intensity_more_curves_noise[0:990,:] = Intensity
Intensity_more_curves_noise[990:1980,:] = Intensity + np.random.randn(Int_curve_len)*0.01

#Redefine the total number of data
total = len(Intensity_more_curves_noise)


#DUPLICATE CLASSES
Classes = np.append(Classes, Classes)
print(Classes.shape)


### Downsampling
data1 = Intensity_more_curves_noise[:,0:768]
step = 3
#256 points
data_downsampl = data1[:,::step]
#Redefine len of the Intensity curves:
Int_profile_len = len(data_downsampl[0])

#Check if there are typo errors for the classes (we only have 6 classes going from 0 to 5)
for i in range(len(Classes)):
    if Classes[i] > 5:
        print(i)
        
        
### Distribute all values randomly ###
randseq = np.random.choice(np.arange(0,total),total,replace=False)
#Data randomly organized
data_downsampl = data_downsampl[randseq,:]
Classes = Classes[randseq]
Classes_to_categorical = np_utils.to_categorical(Classes)

######################### load file - training data ####################
train_dt = data_downsampl[0:800,:]
train_cl = Classes_to_categorical[0:800]
X = train_dt  
y = train_cl 
N_train_cell = len(train_dt)
# process the data to fit in a keras CNN properly
# input data needs to be (N, C, X, Y) - shaped where
# N - number of samples
# C - number of channels per sample
# (X, Y) - sample size

X = X.reshape((N_train_cell, Int_profile_len,1, 1))


######################### load file - testing data ####################
#Define variables 
N_test_cell = total - N_train_cell
#Matrix for intensity values
test_dt = np.zeros((N_test_cell,Int_profile_len))
#Use as testing data the last N_test_cell values within the total 
test_dt[:,:]= data_downsampl[N_train_cell:total]
#Same process for the associated classes
test_cl = Classes_to_categorical[N_train_cell:total]

990
(1980,)


In [17]:
# How to load and use weights from a checkpoint

# define a CNN
# see http://keras.io for API reference

cnn = Sequential()
cnn.add(Convolution2D(64, 3, 1,border_mode="same",activation="relu",input_shape=(Int_profile_len,1, 1)))
cnn.add(Convolution2D(64, 3, 1, border_mode="same", activation="relu"))
cnn.add(MaxPooling2D(pool_size=(2,1)))

cnn.add(Convolution2D(128, 3, 1, border_mode="same", activation="relu"))
cnn.add(Convolution2D(128, 3, 1, border_mode="same", activation="relu"))
cnn.add(Convolution2D(128, 3, 1, border_mode="same", activation="relu"))
cnn.add(MaxPooling2D(pool_size=(2,1)))
    
cnn.add(Convolution2D(256, 3, 1, border_mode="same", activation="relu"))
cnn.add(Convolution2D(256, 3, 1, border_mode="same", activation="relu"))
cnn.add(Convolution2D(256, 3, 1, border_mode="same", activation="relu"))
cnn.add(MaxPooling2D(pool_size=(2,1)))
    
cnn.add(Flatten())
cnn.add(Dense(1024, activation="relu"))
cnn.add(Dropout(0.5))
cnn.add(Dense(6, activation="softmax"))



In [18]:
# load weights
cnn.load_weights("/scratch/barbieri/DATA_CNN_networks/Different_decay_val/iteration1/06_06_2017_Noise1800data_weights.best_DecayValue0.001Iter0.hdf5")
# Compile model (required to make predictions)
#cnn.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.001), metrics=['accuracy'])
print("Created model and loaded weights from file")

Created model and loaded weights from file


In [34]:
scores = cnn.evaluate(test_dt, test_cl, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

RuntimeError: You must compile a model before training/testing. Use `model.compile(optimizer, loss)`.

In [27]:
# estimate accuracy on whole dataset using loaded weights
out = cnn.predict(test_dt.reshape((N_test_cell, Int_profile_len,1, 1)))  

print(out.shape)
print(np.argmax(out,1))
#print((float(np.sum(np.argmax(out,0)==test_cl))/float(test_cl.shape[0])*100.0))

(1180, 6)
[2 1 2 ..., 4 4 2]
