In [1]:
%pylab inline

import os
import scipy.io
import numpy as np
from keras.models import Sequential
from keras.layers import Convolution2D, Dense, Dropout, Flatten, MaxPooling2D
from keras.utils import np_utils
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint


Populating the interactive namespace from numpy and matplotlib


Using TensorFlow backend.


In [2]:
###################################### LOADING DATA ##############################

### Create a dictionary that contains all the dictiories within each dataset (workspace) we want to load ###

#Go to the data directory
dataDir = "C:/Liliana/DATA_CNN_networks/"

Load_Data = {}  
for i in range(0, len(os.listdir( dataDir ))):  
    Load_Data[i] = {}  #Dictionary for a single workspace

    

### lOAD THE DATA STORING THEM IN A DICTIONARY ###

i = 0
for file in os.listdir( dataDir ):
    Load_Data[i]['File'] =  scipy.io.loadmat( dataDir+file )
    i = i+1
    
#print(All_Data[0]['File']['decisionlist'])
#print(All_Data[1]['File']['RESULTS']['STATISTICS'][0,0]['TotalNumCells'][0])
#print(All_Data[1]['File']['RAWDATA'][0,0]['files'][0,0]['coords_x'].astype(double))
#print(All_Data[1]['File']['RAWDATA'][0,0]['files'][0,0][0]['intensity'][0])  #intensity curve for first cell
#print(All_Data[1]['File']['decisionlist'])  #decision list


#### Define another dictionary for storing intensity curves and classes ### 
#From each dataset, the intensity curve and classes are extracted and stored in another dictionary

DATA = {}
N_of_file =len(os.listdir( dataDir ))
for i in range(0, N_of_file):
        DATA[i] = {}



#### Fill the dictionary ###

Ncell_classified = 200 #Number of classified cells per dataset
for i in range(0, N_of_file):
    #Decision array for each dataset
    DATA[i]['decision'] = Load_Data[i]['File']['decisionlist']
    #Size of the matrix to store intensity values
    NumofData = len(Load_Data[i]['File']['RAWDATA'][0,0]['files'][0,0][0]['intensity'])
    Int_curve_len = len(Load_Data[4]['File']['RAWDATA'][0,0]['files'][0,0][0]['intensity'][0][0])
    #Empty matrix to store all the intensity values of all the cells
    DATA[i]['intensity'] = np.zeros((Ncell_classified,  Int_curve_len ))
    for k in range(0,Ncell_classified): #Only the first 200 were classified
        DATA[i]['intensity'][k,:] = Load_Data[i]['File']['RAWDATA'][0,0]['files'][0,0][0]['intensity'][k][0,0:900] #copy the first 900 values

        
### Classes array: contain values from all the data ###
Classes=[]
for i in range( 0, N_of_file ):
    Classes = np.append(Classes , DATA[i]['decision']).astype(int)

    
### Intensities matrix: contain values from all the data    
NTot_Data = N_of_file*Ncell_classified
Intensity = np.zeros((0, DATA[0]['intensity'].shape[1]))
for i in range( 0, N_of_file ):
    Intensity = np.append(Intensity, DATA[i]['intensity'][:,:], axis=0)

    
### Elimate intensity profiles with NaN values and corresponding classes ###
nan_indices = np.where(np.isnan(Intensity))
Intensity = np.delete(Intensity,nan_indices[0], axis = 0)
Classes = np.delete(Classes,nan_indices[0], axis = 0)
#Redefine the total number of data
total = len(Intensity)


### Downsampling ###
data1 = Intensity[:,0:896]
step = 7
data_downsampl = data1[:,::step]
#Redefine len of the Intensity curves:
Int_profile_len = len(data_downsampl[0])


### Distribute all values randomly ###
randseq = np.random.choice(np.arange(0,total),total,replace=False)
#Data randomly organized
data_downsampl = data_downsampl[randseq,:]
Classes = Classes[randseq]

######################### load file - training data ####################
train_dt = data_downsampl[0:800,:]
train_cl = Classes[0:800]
X = train_dt  
y = train_cl 
N_train_cell = len(train_dt)
# process the data to fit in a keras CNN properly
# input data needs to be (N, C, X, Y) - shaped where
# N - number of samples
# C - number of channels per sample
# (X, Y) - sample size

X = X.reshape((N_train_cell, Int_profile_len,1, 1))
y = np_utils.to_categorical(y)


######################### load file - testing data ####################
#Define variables 
N_test_cell = total - N_train_cell
#Matrix for intensity values
test_dt = np.zeros((N_test_cell,Int_profile_len))
#Use as testing data the last N_test_cell values within the total 
test_dt[:,:]= data_downsampl[N_train_cell:total]
#Same process for the associated classes
test_cl = Classes[N_train_cell:total]

In [3]:
# How to load and use weights from a checkpoint

# define a CNN
# see http://keras.io for API reference

cnn = Sequential()
cnn.add(Convolution2D(64, (3, 1),activation="relu",input_shape=(Int_profile_len,1, 1),padding="same"))
cnn.add(Convolution2D(64, (3, 1), activation="relu", padding="same"))
cnn.add(MaxPooling2D(pool_size=(2,1)))

#border_mode = same => output size is the same of input size. The filter does not go outside the bounds of the input
#ReLU => thresholding at zero. All the negative values will be = 0, values bigger than zero will mantain the same value.
cnn.add(Convolution2D(128, (3, 1), activation="relu", padding="same"))
cnn.add(Convolution2D(128, (3, 1), activation="relu", padding="same"))
cnn.add(Convolution2D(128, (3, 1), activation="relu", padding="same"))
cnn.add(MaxPooling2D(pool_size=(2,1)))
    
cnn.add(Convolution2D(256, (3, 1), activation="relu", padding="same"))
cnn.add(Convolution2D(256, (3, 1), activation="relu", padding="same"))
cnn.add(Convolution2D(256, (3, 1), activation="relu", padding="same"))
cnn.add(MaxPooling2D(pool_size=(2,1)))
    
cnn.add(Flatten())
cnn.add(Dense(1024, activation="relu"))
cnn.add(Dropout(0.5)) 
cnn.add(Dense(6, activation="softmax"))

In [4]:
# load weights
cnn.load_weights("weights.best.hdf5")
# Compile model (required to make predictions)
cnn.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0), metrics=['accuracy'])
print("Created model and loaded weights from file")

Created model and loaded weights from file


In [13]:
# estimate accuracy on whole dataset using loaded weights
out = cnn.predict(test_dt.reshape((N_test_cell, Int_profile_len,1, 1)))  
print((float(np.sum(np.argmax(out,1)==test_cl))/float(test_cl.shape[0])*100.0))

82.63157894736842
