In [1]:
import numpy as np
from scipy.misc import imresize
from skimage.color import gray2rgb
from mfeat import io
import json
from keras.models import Model
from keras import backend as K
from keras.applications.vgg16 import VGG16, preprocess_input

cnn = VGG16(include_top=False, weights='imagenet')
layer_id = {layer.name: idx for idx, layer in enumerate(cnn.layers)}


Using TensorFlow backend.


In [2]:
layer_id

{'block1_conv1': 1,
 'block1_conv2': 2,
 'block1_pool': 3,
 'block2_conv1': 4,
 'block2_conv2': 5,
 'block2_pool': 6,
 'block3_conv1': 7,
 'block3_conv2': 8,
 'block3_conv3': 9,
 'block3_pool': 10,
 'block4_conv1': 11,
 'block4_conv2': 12,
 'block4_conv3': 13,
 'block4_pool': 14,
 'block5_conv1': 15,
 'block5_conv2': 16,
 'block5_conv3': 17,
 'block5_pool': 18,
 'input_1': 0}

In [3]:
l1=cnn.layers[1]
l1.get_config()

{'activation': 'relu',
 'activity_regularizer': None,
 'bias_constraint': None,
 'bias_initializer': {'class_name': 'Zeros', 'config': {}},
 'bias_regularizer': None,
 'data_format': 'channels_last',
 'dilation_rate': (1, 1),
 'filters': 64,
 'kernel_constraint': None,
 'kernel_initializer': {'class_name': 'VarianceScaling',
  'config': {'distribution': 'uniform',
   'mode': 'fan_avg',
   'scale': 1.0,
   'seed': None}},
 'kernel_regularizer': None,
 'kernel_size': (3, 3),
 'name': 'block1_conv1',
 'padding': 'same',
 'strides': (1, 1),
 'trainable': True,
 'use_bias': True}

In [4]:
i=1
[l.shape for l in cnn.layers[i].get_weights()]

[(3, 3, 3, 64), (64,)]

In [5]:
import bokeh
from bokeh.plotting import figure, show, output_notebook, output_file

from IPython.display import Image, HTML, Javascript
from coffeetools import coffee
output_notebook()

In [61]:
ww = cnn.get_weights()[0]
ww_im=deprocess_image(ww[:,:,:,0].reshape(-1,3,3))
ww_im

array([[[161, 149, 122],
        [170, 154, 122],
        [165, 151, 123]],

       [[156, 130, 106],
        [162, 130, 101],
        [159, 131, 105]],

       [[122,  98,  99],
        [121,  91,  89],
        [122,  95,  94]]], dtype=uint8)

In [63]:
ww[:,:,:,0]

array([[[ 0.63240981,  0.66972482,  0.648009  ],
        [ 0.61512595,  0.63568246,  0.62595248],
        [ 0.48092937,  0.47474867,  0.47976112]],

       [[ 0.58466601,  0.60656857,  0.59568602],
        [ 0.51180404,  0.51240629,  0.51535988],
        [ 0.38653371,  0.35990655,  0.37537396]],

       [[ 0.48213053,  0.48177043,  0.48416632],
        [ 0.41893074,  0.39781493,  0.41183931],
        [ 0.3918193 ,  0.3501761 ,  0.37070364]]], dtype=float32)

In [68]:
i=0
layer_i = cnn.get_weights()[i]
filters = [deprocess_image(layer_i[:,:,:,j].reshape(-1,3,3)) for j in range(layer_i.shape[3])]
xc = np.meshgrid(range(8),range(8))[0].ravel().tolist()
yc = np.meshgrid(range(8),range(8))[1].ravel().tolist()

In [70]:
p=figure()
p.image_rgba(filters,xc,yc,1,1)
output_notebook()
show(p)

In [None]:
def filter_plot(layer, x_label='X', y_label='Y', plot_width=800, plot_height=600):
    p=figure(responsive=False, x_axis_label=x_label, y_axis_label=y_label, width=plot_width, height=plot_height)
    
    x_range = df[x_col].max()-df[x_col].min()
    y_range = df[y_col].max()-df[y_col].min()

    cds = bokeh.models.ColumnDataSource(
        df
    )
    # cds.data['volume_fraction'] = [ v*5 for v in cds.data['volume_fraction'] ]

    circles = p.circle(
        source=cds,
        x=x_col,
        y=y_col,
        fill_color='color',
        size='DotSize',
        fill_alpha=.6
    )

    p.add_tools(
        bokeh.models.HoverTool(
            tooltips=[
                ('S full', '@Sfull'),
                ('Decay Length (nm)','@CorrLen'),
                ('Image Size (um)','@ImageSize'),
                ('Process','@Process')
            ]
        )
    )

    images = p.image_url(
        source=cds,
        x=x_col,
        y=y_col,
        w=x_range*0.02,
        h=y_range*0.02*plot_width/plot_height,
        url='file_url',
        anchor='center',
        global_alpha=.001
    )

    # mu = [.5]
    # std = np.sqrt(-1*np.diff(mu)**2/(2*np.log(.2)))

    p.x_range.callback = bokeh.models.CustomJS( args= {
            'x_axis': p.x_range,
            'y_axis': p.y_range,
            'images': images.glyph,
            'cds': cds,
            'points': p.renderers[5].glyph
        }, 
        code=coffee.compile("""
        ddata = Bokeh._.max(cds.get('data')[points.get('x').field]) - Bokeh._.min(cds.get('data')[points.get('x').field])
        dx = x_axis.get('end') - x_axis.get('start')
        if dx/ddata > .0001
            images.set 'global_alpha', .001 + (.85)*Math.exp( -1*Math.pow(dx/ddata-.0001,2)/.014)
        """,bare=True)
    )
    
    # patches = []
    # for c in df['class'].unique():
    #     i = df.loc[df['class']==c].index.values
    #     h = ConvexHull(df.iloc[i][['pca_x','pca_y']])
    #     pp = p.patch(**{
    #             'fill_color': df.iloc[i[0]].color,
    #             'fill_alpha': .4,
    #             'x': df.iloc[i].pca_x.values[h.vertices],
    #             'y': df.iloc[i].pca_y.values[h.vertices],
    #             'legend': c,
    #         })

    # gds=bokeh.models.ColumnDataSource(g)
    # class_scale = p.oval( 
    #     source=gds, 
    #     x='pca_x.mean', 
    #     width='pca_x.std',
    #     y='pca_y.mean',
    #     height='pca_y.std',
    #     fill_color='color.min',
    #     fill_alpha = .6,
    # )

    # p.add_tools(
    #     bokeh.models.TapTool( 
    #         renderers=[pp],
    #         callback=bokeh.models.CustomJS(
    #                 code=coffee.compile("""
    #                 console.log 'click'
    #                 """,bare=True)
    #             )
    #     )
    # )
    return p

In [50]:
def deprocess_image(x):
    # normalize tensor: center on 0., ensure std is 0.1
    x -= x.mean()
    x /= (x.std() + 1e-5)
    x *= 0.1

    # clip to [0, 1]
    x += 0.5
    x = np.clip(x, 0, 1)

    # convert to RGB array
    x *= 255
    x = x.transpose((1, 2, 0))
    x = np.clip(x, 0, 255).astype('uint8')
    return x

In [13]:
ww[0][:,:,:,0]

array([[[ 0.42947057,  0.55037946,  0.4800154 ],
        [ 0.373467  ,  0.44007453,  0.4085474 ],
        [-0.06136011, -0.08138704, -0.06514555]],

       [[ 0.27476987,  0.34573907,  0.31047726],
        [ 0.03868078,  0.04063221,  0.05020237],
        [-0.36722335, -0.45350131, -0.40338343]],

       [[-0.05746817, -0.05863491, -0.05087169],
        [-0.26224968, -0.33066967, -0.28522751],
        [-0.35009676, -0.4850302 , -0.41851634]]], dtype=float32)

In [None]:
model = Model(input=cnn.input, output=cnn.get_layer('block4_conv3').output)

In [None]:
def image_tensor(image):
    """ replicate a grayscale image onto the three channels of an RGB image
        and reshape into a tensor appropriate for keras
    """
    image3d = gray2rgb(image).astype(np.float32)
    x = 255*image3d
    x = np.expand_dims(x, axis=0)
    return preprocess_input(x)

In [None]:
micrographs_json = './data/full/micrographs.json'
# obtain a dataset
with open(micrographs_json, 'r') as f:
    micrograph_dataset = json.load(f)

# work with sorted micrograph keys...
keys = sorted(micrograph_dataset.keys())
micrographs = [micrograph_dataset[key] for key in keys]
micrographs = [io.load_image(m, barheight=38) for m in micrographs]


In [None]:
import pandas as pd
afm_csv = '/Users/Imperssonator/CC/uhcs/data/afm3000/afm3000.csv'
df_mg = pd.read_csv(afm_csv)
df_mg = df_mg.set_index('id')

In [None]:
keys = df_mg['id'].tolist()
micrographs = [io.load_image(file, barheight=0) for file in df_mg['imPath'].tolist()]

In [None]:
import matplotlib.pyplot as plt
%matplotlib notebook
%config InlineBackend.figure_format = 'retina'

im_test = io.load_image(df_mg.loc[72].imPath, barheight=0)
plt.imshow(im_test,cmap='gray')

In [None]:
import h5py

def load_representations(datafile):
    # grab image representations from hdf5 file
    keys, features = [], []

    with h5py.File(datafile, 'r') as f:
        for key in f:
            keys.append(key)
            features.append(f[key][...])

    return np.array(keys), np.array(features)

In [None]:
from sklearn.externals import joblib
dict_file='data/afm3000/dictionary/ssift-kmeans-100.pkl'
dictionary = joblib.load(dict_file)

In [None]:
dictionary.cluster_centers_.shape

In [None]:
from mfeat import local
sift_result = local.sparse_sift(im_test)
sift_result.shape

In [None]:
from mfeat import encode
vlad_feats = encode.vlad(sift_result,dictionary)

In [None]:
vlad_feats.shape

In [None]:
datafile='data/afm3000/features/ssift-vlad-100.h5'
vlad_keys, vlad_feats = load_representations(datafile)
vlad_ids = np.array([int(k) for k in vlad_keys])

In [None]:
%matplotlib inline
test_feat=sift_feats[np.where(sift_ids==72)][0,:]
x = np.array([i for i in range(test_feat.shape[0])])
p1=plt.bar(x,test_feat)
plt.show()

In [None]:
cnn_feat = model.predict(image_tensor(im_test))

In [None]:
cnn_feat.shape

In [None]:
from sklearn.externals import joblib
cnn_dict_file='data/afm3000/dictionary/vgg16_block4_conv3-kmeans-32.pkl'
cnn_dict = joblib.load(cnn_dict_file)

In [None]:
cnn_dict.cluster_centers_.shape

In [None]:
keys = df_mg['id'].tolist()
keys = [str(k) for k in keys]
micrographs = [io.load_image(file, barheight=0) for file in df_mg['imPath'].tolist()]

# set up paths
dictionary_file = '{dir}/dictionary/{method}-kmeans-{n_clusters}.pkl'.format(**metadata)
featurefile = '{dir}/features/{method}-{encoding}-{n_clusters}.h5'.format(**metadata)

In [None]:
datafile='data/afm3000/features/vgg16_block4_conv3-vlad-32.h5'
cnn_keys, cnn_feats = load_representations(datafile)

In [None]:
cnn_feats.shape

In [None]:
features.shape

In [None]:
    ids = pd.Series([int(s) for s in keys])
    df_mg = pd.read_csv('/Users/Imperssonator/CC/uhcs/data/afm3000/afm3000.csv')
    which_ids = np.where(ids.apply(lambda x: x in df_mg.id.tolist()))[0]
    keys_reduced = keys[which_ids]
    ids_reduced = pd.Series([int(s) for s in keys_reduced])
    features_reduced = features[which_ids,:]
    df_mg = df_mg.set_index('id')
    labels = np.array(df_mg['noise'].loc[ids_reduced.tolist()])

In [None]:
np.unique(labels)

In [None]:
np.where(df_mg['noise'].loc[ids_reduced.tolist()].apply(lambda x: type(x)!=str))

In [None]:
df_mg.iloc[1046]

In [None]:
labeltypes=[type(l) for l in labels]
for i,l in enumerate(labeltypes):
    if l != str:
        print(i)

In [None]:
keys[np.array(ids.apply(lambda x: x in df_mg.id.tolist()))]

In [None]:
features[np.array(ids.apply(lambda x: x in df_mg.id.tolist())),:]

In [None]:
ids[ids.apply(lambda x: x in df_mg.id.tolist())]

In [None]:

df_mg = pd.read_csv('/Users/Imperssonator/CC/uhcs/data/afm3000/afm3000.csv')
df_mg = df_mg.set_index('id')
df_mg

In [None]:
ids

In [None]:
labels = np.array(df_mg['fiber'].loc[ids.tolist()])
np.unique(labels)

In [None]:
df_mg.id

In [None]:
sl=[m.shape for m in micrographs]


In [None]:
out.shape

In [None]:
range(out.shape[0])

In [None]:
np.random.choice(range(out.shape[0]), size=0.1, replace=False)

In [None]:
out.shape

In [None]:
out_reshape = out.reshape((-1, out.shape[-1])) # to [feature, channels]
out_reshape.shape

In [None]:
def tensor_to_features(X, subsample=None):
    """ convert feature map tensor to numpy data matrix {nsamples, nchannels} """
    
    # transpose array so that map dimensions are on the last axis
#     features = X.transpose(0,2,3,1) # to [batch, height, width, channels]
    features = X.reshape((-1, X.shape[-1])) # to [feature, channels]

#     if subsample >= 1.0 or subsample <= 0:
#         subsample = None

    if subsample is not None:
        choice = np.sort(
            np.random.choice(range(features.shape[0]), size=subsample, replace=False)
        )
        features = features[choice]
        
    return features

In [None]:
features=tensor_to_features(out)

In [None]:
features.shape

In [None]:
features[features==0].shape

In [None]:
4096*512

In [None]:
import h5py

In [None]:
import glob

In [None]:
datafile=glob.glob('/Users/Imperssonator/CC/uhcs/data/full/features/*')

In [None]:
datafile=datafile[0]

In [None]:
h5f=h5py.File(datafile[0],'r')

In [None]:
def load_representations(datafile):
    # grab image representations from hdf5 file
    keys, features = [], []

    with h5py.File(datafile, 'r') as f:
        for key in f:
            keys.append(key)
            features.append(f[key][...])

    return np.array(keys), np.array(features)

In [None]:
keys, feats = load_representations(file[0])

In [None]:
feats.shape

In [None]:
keys.shape

In [None]:
keys, features = load_representations(datafile)

labels = []
for key in keys:
    if '-' in key:
        # deal with cropped micrographs: key -> Micrograph.id-UL
        m_id, quadrant = key.split('-')
    else:
        m_id = key
    m = db.query(Micrograph).filter(Micrograph.micrograph_id == int(m_id)).one()
    labels.append(m.primary_microconstituent)
labels = np.array(labels)

# simplify: get primary microconstituent; throw out martensite
primary_label = np.array([label.split('+')[0] for label in labels])
k = np.array(keys)[primary_label != 'martensite']
l = primary_label[primary_label != 'martensite']
X = features[primary_label != 'martensite']

l, X, sel = select_balanced_dataset(l, X, n_per_class=n_per_class, seed=seed)

cv = StratifiedKFold(n_splits=10, shuffle=True)
# cv = StratifiedShuffleSplit(n_splits=10, test_size=0.1)


In [None]:
alist = [1, 2, 3, 4, 5]
alist[[1, 3]]

In [None]:
np.unique(labels)

In [None]:
np.unique(list(labels))

In [None]:
import os
featuresfile = '/Users/Imperssonator/CC/uhcs/data/afm3000/tsne/ssift-vlad-100.h5'
path_list = os.path.normpath(featuresfile).split(os.sep)
dataset_dir = os.path.join(*list(path_list[:-2]))

In [None]:
os.path.join(path_list[:-2])