In [1]:
import numpy as np
import pickle 
import matplotlib.pyplot as plt
from scipy import *
import scipy.misc
import os
import h5py

import keras
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras import backend as K
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD, adam
from keras.models import Sequential
from keras.losses import categorical_crossentropy
from keras.utils import np_utils

from skimage.feature import hog
from skimage import data, exposure
from skimage.transform import rescale, resize

from sklearn.utils import shuffle
#import astropy.visualization as vis

  (fname, cnt))
  (fname, cnt))
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# Setting directory paths 
root_dir = os.getcwd()

# Loading data
data = h5py.File(r'data.h5', 'r')
random_radio = h5py.File(r'random_1000.h5', 'r')

# Unpacking data
images_raw = np.asarray(data['images'])
labels_raw = data['labels']

images_radio = np.asarray(random_radio['images'])

In [3]:
print(images_radio.shape)

(1000, 300, 300)


In [4]:
#Deleting Empty arrays
zero_ind = [i for (i,v) in enumerate(images_radio) if np.mean(v)==0. or np.isnan(np.mean(v))==True]
images_radio_clean = np.delete(images_radio, zero_ind, 0)

In [5]:
images_radio_clean.shape

(890, 300, 300)

In [6]:
# Visually Inspecting Images
#stretch = vis.AsinhStretch(1e-2) + vis.MinMaxInterval()

#imgs = np.reshape(images_radio_clean, (-1, images_radio_clean.shape[1], images_radio_clean.shape[2]))
#imgs.shape

"""for n in range(0, imgs.shape[0]):
    fig = plt.figure(figsize=(6, 6))
    
    plt.subplot(1, 2, 2)
    ax1 = fig.add_subplot(1,2,1)
    plt.imshow(imgs[n,:,:], cmap='viridis', shape=(300, 300))
    ax1 = fig.add_subplot(1,2,2)
    plt.imshow(stretch(imgs[n,:,:]), cmap='viridis', shape=(300, 300))
    plt.show()
    print ("image with index  ", n, "  with mean ", np.mean(imgs[n,:,:]))
    if np.isnan(np.mean(imgs[n,:,:])) ==True:
        print ("you've got nans!")
        
"""


'for n in range(0, imgs.shape[0]):\n    fig = plt.figure(figsize=(6, 6))\n    \n    plt.subplot(1, 2, 2)\n    ax1 = fig.add_subplot(1,2,1)\n    plt.imshow(imgs[n,:,:], cmap=\'viridis\', shape=(300, 300))\n    ax1 = fig.add_subplot(1,2,2)\n    plt.imshow(stretch(imgs[n,:,:]), cmap=\'viridis\', shape=(300, 300))\n    plt.show()\n    print ("image with index  ", n, "  with mean ", np.mean(imgs[n,:,:]))\n    if np.isnan(np.mean(imgs[n,:,:])) ==True:\n        print ("you\'ve got nans!")\n        \n'

In [7]:
FR1_ind = np.array([46, 77, 104, 587])
FR2_ind = np.array([3, 23, 34, 41, 52, 54, 63, 65, 70, 89, 101, 114, 139, 144, 191, 205, 229, 232, 238, 256, 267, 269, 279, 281, 286, 313, 329, 345, 364, 375, 385, 393, 421, 427, 430, 432, 437, 451, 455, 457, 458, 473, 492, 495, 502, 518, 521, 525, 526, 533, 545, 552, 554, 555, 556, 559, 562, 573, 593, 622, 627, 641, 668, 672, 700, 702, 709, 717, 718, 725, 767, 860, 880])

In [8]:
np.asarray(images_radio_clean)[FR2_ind].shape

(73, 300, 300)

In [9]:
# Defining rescaling function
def rescale_imgs(images):
    img = np.zeros((images.shape[0], 256, 256))   # Empty Array
    
    for i in range(0 ,images.shape[0]):
        img_scaled = resize(images[i, :, :], (256, 256))
        img[i, :, :] = img_scaled
   
    return img    

In [10]:
images = rescale_imgs(images_raw)
neg = rescale_imgs(images_radio_clean)

  warn("The default mode, 'constant', will be changed to 'reflect' in "


In [11]:
# balance classes
def formatting_classes(pos_images, pos_labels, neg_images):
    # Splitting positive data into FR1's and FR2'2
    class1 = []
    class2 = []
    
    for i in range(pos_labels.shape[0]):
        if pos_labels[i] == False:
            class1.append(i)
    
    for i in range(pos_labels.shape[0]):
        if pos_labels[i] == True:
            class2.append(i)
        
    images1 = pos_images[class1, :, :] 
    images2 = pos_images[class2, :, :]
    
    
    # Removing samples that look very much like FR1's and FR'2 from our negative data
    neg1 = np.delete(neg_images, FR1_ind, 0)  #Negative data for FR1 classifier
    neg2 = np.delete(neg_images, FR2_ind, 0)  #Negative data for FR2 classifier
    
    
    # Adding those deleted samples to the positve dataset
    images2_stack = np.vstack((images2, neg_images[FR2_ind]))
    
    # Adding the opposite classes to neg data to the
    neg1 = np.vstack((neg1, images2))
    neg2 = np.vstack((neg2, images1))
    
    # Shuffling images
    images1 = shuffle(images1, random_state = 0)    
    images2_stack = shuffle(images2_stack, random_state = 0) 
    neg1 = shuffle(neg1, random_state = 0)  
    neg2 = shuffle(neg2, random_state = 0) 
    
    return images1, images2_stack, neg1, neg2

In [12]:
images1, images2, neg1, neg2  = formatting_classes(np.asarray(images), np.asarray(labels_raw), neg)

In [13]:
neg1.shape

(1009, 256, 256)

In [14]:
images1_cut = images1[:190]
images2_cut = images2[:190]
neg1_cut = neg1[:1004]
neg2_cut = neg2[:1004]

In [15]:
def format_data(images, ratio = 0.5):
    split = round(ratio*images.shape[0])
    
    train = images[ :split, :]
    test = images[split:, :]
    
    return train, test

train_pos1, test_pos1  = format_data(images1_cut)
train_pos2, test_pos2  = format_data(images2_cut)
train_neg1, test_neg1  = format_data(neg1_cut)
train_neg2, test_neg2  = format_data(neg2_cut)

In [16]:
train_neg2.shape

(502, 256, 256)

In [17]:
#imgs = np.reshape(images_shrunk, (-1, images_shrunk.shape[1], images_shrunk.shape[2]))

#plt.figure(figsize=(12, 12))
#for n in range(0, 16):
    #plt.subplot(4, 4, 1 + n)
    #plt.imshow(stretch(imgs[-n-1,:,:]), cmap='viridis', shape=(135, 135))
    #plt.axis('off')
    #plt.title(labels[n])

In [18]:
#stretch = vis.AsinhStretch(1e-1) + vis.MinMaxInterval()

#images_s = np.zeros((images_shrunk.shape[0], images_shrunk.shape[1], images_shrunk.shape[2]))

#for i, images in enumerate(images_shrunk):
    #images_s[i, :, :] = stretch(images_shrunk[i, :, :])

In [19]:
def data_gen(images, size=18000, pos=True):

    size=size/2
    
    batch_size = 20
    samples = np.asarray(images).shape[0]

    x = images.reshape((-1, images.shape[1],images.shape[2], 1))

    if pos==True:
        y = np.full(x.shape[0], 1)
        
        datagen = ImageDataGenerator(
            rotation_range=180,
            zoom_range=0.2,
            width_shift_range=0.15,
            height_shift_range=0.15,
            horizontal_flip=True,
            vertical_flip =True,
            fill_mode = "nearest")
    
    elif pos==False:
        y = np.full(x.shape[0], 0)
        
        datagen = ImageDataGenerator(
            rotation_range=180,
            zoom_range=0.2,
            horizontal_flip=True,
            vertical_flip =True,
            fill_mode = "nearest")
    
    # the .flow() command below generates batches of randomly transformed images
    # and saves the results to the `preview/` directory
    gen = datagen.flow(x, y, batch_size=batch_size)

    data_mult = int(floor(size/x.shape[0]))
    
    print (data_mult)
    print (size/x.shape[0])
    print (x.shape[0])

    pro_images = np.zeros((x.shape[0]*data_mult, images.shape[1], images.shape[2], 1))
    pro_labels = np.zeros((y.shape[0]*data_mult))
    
    print (pro_images.shape)
    
    for e in range(1, data_mult+1):
        batch = 1
        b = batch_size
        b_start = samples*(e-1)

        for X_batch, Y_batch in gen:
            if batch < (samples/b):
                pro_images[b_start+b*(batch-1):b_start+batch*b, :, :, :] = X_batch
                pro_labels[b_start+b*(batch-1):b_start+batch*b] = Y_batch

            else: 
                pro_images[b_start+b*(batch-1):b_start+b*(batch-1) + X_batch.shape[0]%b, :, :, :] = X_batch
                pro_labels[b_start+b*(batch-1):b_start+b*(batch-1) + X_batch.shape[0]%b] = Y_batch
                break


            batch += 1
        print(e)
        
    return pro_images, pro_labels

In [20]:
train_pos_x, train_pos_y  = data_gen(train_pos2, pos=True)
train_neg_x, train_neg_y = data_gen(train_neg2, pos=False)

94
94.73684210526316
95
(8930, 256, 256, 1)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
17
17.92828685258964
502
(8534, 256, 256, 1)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17


In [21]:
def balance_classes(x1, x2, y1, y2):
    class1 = x1.shape[0]
    class2 = x2.shape[0]
    
    if class1 > class2:
        shp = class2
    
    elif class1 < class2:
        shp = class1
    
    elif class1 == class2:
        shp = class1
    
    print ("slice")
    x1 = x1[:shp, :, :, :] 
    x2 = x2[:shp, :, :, :] 
    y1 = y1[:shp]
    y2 = y2[:shp]
    
    return x1, x2, y1, y2

In [22]:
def shuffle_classes(x1, x2, y1, y2):
    print ("stack")
    x_stack = np.vstack((x1, x2))
    y_stack =  np.concatenate((y1, y2), axis=0)
    
    print ('start')    
    x, y = shuffle(x_stack, y_stack, random_state = 0)
    print ('stop')
    return x, y
    

In [23]:
train_pos_x, train_neg_x, train_pos_y, train_neg_y = balance_classes(train_pos_x, train_neg_x, train_pos_y, train_neg_y)

slice


In [24]:
train_pos_x.shape

(8534, 256, 256, 1)

In [25]:
pro_train_x, pro_train_y = shuffle_classes(train_pos_x, train_neg_x, train_pos_y, train_neg_y)

stack
start
stop


In [26]:
train_pos_x = None
train_pos_y = None
train_neg_x = None
train_neg_y = None

In [27]:
nb_train  = pro_train_x.shape[0]
pro_train_x = np.reshape(pro_train_x, (pro_train_x.shape[0], pro_train_x.shape[1], pro_train_x.shape[2])) + (10e-10)*np.random.randn(nb_train, 256, 256)

In [None]:
with open('train_x_fr2.dat', 'wb') as f:
    pickle.dump(pro_train_x, f, protocol=-1)

In [None]:
pro_train_x = None

In [None]:
test_neg_x, test_neg_y  = data_gen(test_neg2, pos=False)
test_pos_x, test_pos_y = data_gen(test_pos2, pos=True)

In [None]:
test_pos_x, test_neg_x, test_pos_y, test_neg_y = balance_classes(test_pos_x, test_neg_x, test_pos_y, test_neg_y)

In [None]:
pro_test_x, pro_test_y = shuffle_classes(test_pos_x, test_neg_x, test_pos_y, test_neg_y)

In [None]:
test_pos_x = None
test_pos_y = None
test_neg_x = None
test_neg_y = None

In [None]:
nb_test  = pro_test_x.shape[0]
pro_test_x = np.reshape(pro_test_x, (pro_test_x.shape[0], pro_test_x.shape[1], pro_test_x.shape[2])) + (10e-10)*np.random.randn(nb_test, 256, 256)

In [None]:
with open('test_x_fr2.dat', 'wb') as f:
    pickle.dump(pro_test_x, f, protocol=-1)

In [None]:
pro_test_x = None

In [None]:
with open('train_y_fr2.dat', 'wb') as f:
    pickle.dump(pro_train_y, f, protocol=-1)    

with open('test_y_fr2.dat', 'wb') as f:
    pickle.dump(pro_test_y, f, protocol=-1)

In [None]:
print ("Done")