In [None]:
import numpy as np
from scipy.misc import imresize
from skimage.color import gray2rgb
from mfeat import io
import json
from keras.models import Model
from keras import backend as K
from keras.applications.vgg16 import VGG16, preprocess_input

cnn = VGG16(include_top=False, weights='imagenet')
layer_id = {layer.name: idx for idx, layer in enumerate(cnn.layers)}


In [None]:
layer_id

In [None]:
ww = cnn.get_weights()
[b.shape for b in ww]

In [None]:
model = Model(input=cnn.input, output=cnn.get_layer('block4_conv3').output)

In [None]:
def image_tensor(image):
    """ replicate a grayscale image onto the three channels of an RGB image
        and reshape into a tensor appropriate for keras
    """
    image3d = gray2rgb(image).astype(np.float32)
    x = 255*image3d
    x = np.expand_dims(x, axis=0)
    return preprocess_input(x)

In [None]:
micrographs_json = './data/full/micrographs.json'
# obtain a dataset
with open(micrographs_json, 'r') as f:
    micrograph_dataset = json.load(f)

# work with sorted micrograph keys...
keys = sorted(micrograph_dataset.keys())
micrographs = [micrograph_dataset[key] for key in keys]
micrographs = [io.load_image(m, barheight=38) for m in micrographs]


In [None]:
import pandas as pd
afm_csv = '/Users/Imperssonator/CC/uhcs/data/afm3000/afm3000.csv'
df_mg = pd.read_csv(afm_csv)
df_mg = df_mg.set_index('id')

In [None]:
keys = df_mg['id'].tolist()
micrographs = [io.load_image(file, barheight=0) for file in df_mg['imPath'].tolist()]

In [None]:
import matplotlib.pyplot as plt
%matplotlib notebook
%config InlineBackend.figure_format = 'retina'

im_test = io.load_image(df_mg.loc[72].imPath, barheight=0)
plt.imshow(im_test,cmap='gray')

In [None]:
import h5py

def load_representations(datafile):
    # grab image representations from hdf5 file
    keys, features = [], []

    with h5py.File(datafile, 'r') as f:
        for key in f:
            keys.append(key)
            features.append(f[key][...])

    return np.array(keys), np.array(features)

In [None]:
from sklearn.externals import joblib
dict_file='data/afm3000/dictionary/ssift-kmeans-100.pkl'
dictionary = joblib.load(dict_file)

In [None]:
dictionary.cluster_centers_.shape

In [None]:
from mfeat import local
sift_result = local.sparse_sift(im_test)
sift_result.shape

In [None]:
from mfeat import encode
vlad_feats = encode.vlad(sift_result,dictionary)

In [None]:
vlad_feats.shape

In [None]:
datafile='data/afm3000/features/ssift-vlad-100.h5'
vlad_keys, vlad_feats = load_representations(datafile)
vlad_ids = np.array([int(k) for k in vlad_keys])

In [None]:
%matplotlib inline
test_feat=sift_feats[np.where(sift_ids==72)][0,:]
x = np.array([i for i in range(test_feat.shape[0])])
p1=plt.bar(x,test_feat)
plt.show()

In [None]:
cnn_feat = model.predict(image_tensor(im_test))

In [None]:
cnn_feat.shape

In [None]:
from sklearn.externals import joblib
cnn_dict_file='data/afm3000/dictionary/vgg16_block4_conv3-kmeans-32.pkl'
cnn_dict = joblib.load(cnn_dict_file)

In [None]:
cnn_dict.cluster_centers_.shape

In [None]:
keys = df_mg['id'].tolist()
keys = [str(k) for k in keys]
micrographs = [io.load_image(file, barheight=0) for file in df_mg['imPath'].tolist()]

# set up paths
dictionary_file = '{dir}/dictionary/{method}-kmeans-{n_clusters}.pkl'.format(**metadata)
featurefile = '{dir}/features/{method}-{encoding}-{n_clusters}.h5'.format(**metadata)

In [None]:
datafile='data/afm3000/features/vgg16_block4_conv3-vlad-32.h5'
cnn_keys, cnn_feats = load_representations(datafile)

In [None]:
cnn_feats.shape

In [None]:
features.shape

In [None]:
    ids = pd.Series([int(s) for s in keys])
    df_mg = pd.read_csv('/Users/Imperssonator/CC/uhcs/data/afm3000/afm3000.csv')
    which_ids = np.where(ids.apply(lambda x: x in df_mg.id.tolist()))[0]
    keys_reduced = keys[which_ids]
    ids_reduced = pd.Series([int(s) for s in keys_reduced])
    features_reduced = features[which_ids,:]
    df_mg = df_mg.set_index('id')
    labels = np.array(df_mg['noise'].loc[ids_reduced.tolist()])

In [None]:
np.unique(labels)

In [None]:
np.where(df_mg['noise'].loc[ids_reduced.tolist()].apply(lambda x: type(x)!=str))

In [None]:
df_mg.iloc[1046]

In [None]:
labeltypes=[type(l) for l in labels]
for i,l in enumerate(labeltypes):
    if l != str:
        print(i)

In [None]:
keys[np.array(ids.apply(lambda x: x in df_mg.id.tolist()))]

In [None]:
features[np.array(ids.apply(lambda x: x in df_mg.id.tolist())),:]

In [None]:
ids[ids.apply(lambda x: x in df_mg.id.tolist())]

In [None]:

df_mg = pd.read_csv('/Users/Imperssonator/CC/uhcs/data/afm3000/afm3000.csv')
df_mg = df_mg.set_index('id')
df_mg

In [None]:
ids

In [None]:
labels = np.array(df_mg['fiber'].loc[ids.tolist()])
np.unique(labels)

In [None]:
df_mg.id

In [None]:
sl=[m.shape for m in micrographs]


In [None]:
out.shape

In [None]:
range(out.shape[0])

In [None]:
np.random.choice(range(out.shape[0]), size=0.1, replace=False)

In [None]:
out.shape

In [None]:
out_reshape = out.reshape((-1, out.shape[-1])) # to [feature, channels]
out_reshape.shape

In [None]:
def tensor_to_features(X, subsample=None):
    """ convert feature map tensor to numpy data matrix {nsamples, nchannels} """
    
    # transpose array so that map dimensions are on the last axis
#     features = X.transpose(0,2,3,1) # to [batch, height, width, channels]
    features = X.reshape((-1, X.shape[-1])) # to [feature, channels]

#     if subsample >= 1.0 or subsample <= 0:
#         subsample = None

    if subsample is not None:
        choice = np.sort(
            np.random.choice(range(features.shape[0]), size=subsample, replace=False)
        )
        features = features[choice]
        
    return features

In [None]:
features=tensor_to_features(out)

In [None]:
features.shape

In [None]:
features[features==0].shape

In [None]:
4096*512

In [None]:
import h5py

In [None]:
import glob

In [None]:
datafile=glob.glob('/Users/Imperssonator/CC/uhcs/data/full/features/*')

In [None]:
datafile=datafile[0]

In [None]:
h5f=h5py.File(datafile[0],'r')

In [None]:
def load_representations(datafile):
    # grab image representations from hdf5 file
    keys, features = [], []

    with h5py.File(datafile, 'r') as f:
        for key in f:
            keys.append(key)
            features.append(f[key][...])

    return np.array(keys), np.array(features)

In [None]:
keys, feats = load_representations(file[0])

In [None]:
feats.shape

In [None]:
keys.shape

In [None]:
keys, features = load_representations(datafile)

labels = []
for key in keys:
    if '-' in key:
        # deal with cropped micrographs: key -> Micrograph.id-UL
        m_id, quadrant = key.split('-')
    else:
        m_id = key
    m = db.query(Micrograph).filter(Micrograph.micrograph_id == int(m_id)).one()
    labels.append(m.primary_microconstituent)
labels = np.array(labels)

# simplify: get primary microconstituent; throw out martensite
primary_label = np.array([label.split('+')[0] for label in labels])
k = np.array(keys)[primary_label != 'martensite']
l = primary_label[primary_label != 'martensite']
X = features[primary_label != 'martensite']

l, X, sel = select_balanced_dataset(l, X, n_per_class=n_per_class, seed=seed)

cv = StratifiedKFold(n_splits=10, shuffle=True)
# cv = StratifiedShuffleSplit(n_splits=10, test_size=0.1)


In [None]:
alist = [1, 2, 3, 4, 5]
alist[[1, 3]]

In [None]:
np.unique(labels)

In [None]:
np.unique(list(labels))

In [None]:
import os
featuresfile = '/Users/Imperssonator/CC/uhcs/data/afm3000/tsne/ssift-vlad-100.h5'
path_list = os.path.normpath(featuresfile).split(os.sep)
dataset_dir = os.path.join(*list(path_list[:-2]))

In [None]:
os.path.join(path_list[:-2])