In [1]:
import tensorflow as tf
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import pickle
import keras
import random
from keras.layers.core import Dense, Activation, Dropout
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation, CuDNNGRU, Conv1D, CuDNNLSTM, Flatten
from keras.layers import AveragePooling1D, MaxPooling1D, Bidirectional, GlobalMaxPool1D, Concatenate, GlobalAveragePooling1D, GlobalMaxPooling1D,concatenate
from keras.layers import SpatialDropout1D
from keras.models import Sequential
from keras.callbacks import Callback
from sklearn.preprocessing import StandardScaler, MinMaxScaler

from keras.initializers import glorot_uniform
from keras.layers import Input, Add, Dense, Activation, ZeroPadding1D, BatchNormalization, Flatten, Conv1D, AveragePooling1D, MaxPooling1D, GlobalMaxPooling1D
from keras.models import Model, load_model

Using TensorFlow backend.


In [2]:
import os
import warnings
from keras import backend as K
warnings.filterwarnings('ignore')

os.environ['PYTHONHASHSEED'] = '0'

In [4]:
names=np.load('name.npy')
# names
mapping = {}
i = 0
for n in names:
    mapping[i] = n
    i+=1

In [5]:
X = np.load('new_data.npy')

In [6]:
y = np.load('labels.npy')

In [7]:
Z = np.c_[X,y]
np.random.shuffle(Z)
X = Z[:,:-1]
y = Z[:,-1]
y = y.astype(int)
# print(X.shape)
# print(y.shape)

In [8]:
def identity_block(X, f, filters, stage, block):
    """
    实现图3的恒等块

    参数：
        X - 输入的tensor类型的数据，维度为( m, 1001， 1)
        f - 整数，指定主路径中间的CONV窗口的维度
        filters - 整数列表，定义了主路径每层的卷积层的过滤器数量
        stage - 整数，根据每层的位置来命名每一层，与block参数一起使用。
        block - 字符串，据每层的位置来命名每一层，与stage参数一起使用。

    返回：
        X - 恒等块的输出，tensor类型，维度为(1001，1)

    """

    #定义命名规则
    conv_name_base = "res" + str(stage) + block + "_branch"
    bn_name_base   = "bn"  + str(stage) + block + "_branch"

    #获取过滤器
    F1, F2, F3 = filters

    #保存输入数据，将会用于为主路径添加捷径
    X_shortcut = X

    #主路径的第一部分
    ##卷积层
    X = Conv1D(filters=F1, kernel_size=1, strides=1 ,padding="valid",
               name=conv_name_base+"2a", kernel_initializer=glorot_uniform(seed=0))(X)
    ##归一化
    X = BatchNormalization(name=bn_name_base+"2a")(X)
    ##使用ReLU激活函数
    X = Activation("relu")(X)

    #主路径的第二部分
    ##卷积层
    X = Conv1D(filters=F2, kernel_size=f,strides=1, padding="same",
               name=conv_name_base+"2b", kernel_initializer=glorot_uniform(seed=0))(X)
    ##归一化
    X = BatchNormalization(name=bn_name_base+"2b")(X)
    ##使用ReLU激活函数
    X = Activation("relu")(X)


    #主路径的第三部分
    ##卷积层
    X = Conv1D(filters=F3, kernel_size=1, strides=1, padding="valid",
               name=conv_name_base+"2c", kernel_initializer=glorot_uniform(seed=0))(X)
    ##归一化
    X = BatchNormalization(name=bn_name_base+"2c")(X)
    ##没有ReLU激活函数

    #最后一步：
    ##将捷径与输入加在一起
    X = Add()([X,X_shortcut])
    ##使用ReLU激活函数
    X = Activation("relu")(X)

    return X

def convolutional_block(X, f, filters, stage, block, s=2):
    #定义命名规则
    conv_name_base = "res" + str(stage) + block + "_branch"
    bn_name_base   = "bn"  + str(stage) + block + "_branch"

    #获取过滤器数量
    F1, F2, F3 = filters

    #保存输入数据
    X_shortcut = X
    
    #主路径
    ##主路径第一部分
    X = Conv1D(filters=F1, kernel_size=1, strides=s, padding="valid",
               name=conv_name_base+"2a", kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(name=bn_name_base+"2a")(X)
    X = Activation("relu")(X)

    ##主路径第二部分
    X = Conv1D(filters=F2, kernel_size=f, strides=1, padding="same",
               name=conv_name_base+"2b", kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(name=bn_name_base+"2b")(X)
    X = Activation("relu")(X)

    ##主路径第三部分
    X = Conv1D(filters=F3, kernel_size=1, strides=1, padding="valid",
               name=conv_name_base+"2c", kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(name=bn_name_base+"2c")(X)

    #捷径
    X_shortcut = Conv1D(filters=F3, kernel_size=1, strides=s, padding="valid",
               name=conv_name_base+"1", kernel_initializer=glorot_uniform(seed=0))(X_shortcut)
    X_shortcut = BatchNormalization(name=bn_name_base+"1")(X_shortcut)

    #最后一步
    X = Add()([X,X_shortcut])
    X = Activation("relu")(X)

    return X

# Construct ResNet50
def ResNet50(input_shape=(2251,1),classes=186):
    #定义tensor类型的输入数据
    X_input = Input(input_shape)

    #0填充
    X = keras.layers.convolutional.ZeroPadding1D(3)(X_input)

    #stage1
    X = Conv1D(filters=64, kernel_size=7, strides=2, name="conv1",
               kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(name="bn_conv1")(X)
    X = Activation("relu")(X)
    X = MaxPooling1D(pool_size=3, strides=2)(X)

    #stage2
    X = convolutional_block(X, f=3, filters=[64,64,256], stage=2, block="a", s=1)
    X = identity_block(X, f=3, filters=[64,64,256], stage=2, block="b")
    X = identity_block(X, f=3, filters=[64,64,256], stage=2, block="c")

    #stage3
    X = convolutional_block(X, f=3, filters=[128,128,512], stage=3, block="a", s=2)
    X = identity_block(X, f=3, filters=[128,128,512], stage=3, block="b")
    X = identity_block(X, f=3, filters=[128,128,512], stage=3, block="c")
    X = identity_block(X, f=3, filters=[128,128,512], stage=3, block="d")

    #stage4
    X = convolutional_block(X, f=3, filters=[256,256,1024], stage=4, block="a", s=2)
    X = identity_block(X, f=3, filters=[256,256,1024], stage=4, block="b")
    X = identity_block(X, f=3, filters=[256,256,1024], stage=4, block="c")
    X = identity_block(X, f=3, filters=[256,256,1024], stage=4, block="d")
    X = identity_block(X, f=3, filters=[256,256,1024], stage=4, block="e")
    X = identity_block(X, f=3, filters=[256,256,1024], stage=4, block="f")

    #stage5
    X = convolutional_block(X, f=3, filters=[512,512,2048], stage=5, block="a", s=2)
    X = identity_block(X, f=3, filters=[512,512,2048], stage=5, block="b")
    X = identity_block(X, f=3, filters=[512,512,2048], stage=5, block="c")

    #均值池化层
    X = AveragePooling1D(pool_size=2,padding="same")(X)

    #输出层
    X = Flatten()(X)
    X = Dense(classes, activation="softmax", name="fc"+str(classes),
              kernel_initializer=glorot_uniform(seed=0))(X)


    #创建模型
    cnn_model = Model(inputs=X_input, outputs=X, name="ResNet50")

    return cnn_model


In [9]:
cnn_model = ResNet50(input_shape=(2251,1),classes=186)
cnn_model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

Instructions for updating:
Colocations handled automatically by placer.


In [10]:
def convert_to_one_hot(y, C):
    return np.eye(C)[y.reshape(-1)]

one_hot_y = convert_to_one_hot(y,186)

In [11]:
one_hot_y.shape

(7254, 186)

In [None]:
# Fitting CNN model
cnn_model.fit(X.reshape(7254, 2251, 1), one_hot_y, batch_size = 64, epochs = 100, verbose = 1)


Instructions for updating:
Use tf.cast instead.
Epoch 1/100

In [12]:
# Save model
cnn_model.save("Lenet_v1.h5")

In [5]:
from keras.models import load_model
 
cnn_model= load_model('Lenet_v1.h5')

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.


In [14]:
Test=np.load('mix_test.npy')
Test.shape

(14, 2251)

In [15]:
# scaler = MinMaxScaler()
# Test = pd.DataFrame(Test)
# X_test = scaler.transform(Test.iloc[:,:])
# X_test = X[0:14,:]
X_test = Test
pred = cnn_model.predict(X_test.reshape(14, 2251, 1))
print(pred.shape)
out = [np.argmax(p) for p in pred]
out = np.array(out)
predicted = np.vectorize(mapping.get)(out)
print(predicted)

(14, 186)
['ZIF-65' 'ZIF-65' 'XAYMUF' 'ZIF-65' 'AENCESE10' 'ZIF-7' 'ZIF-75' 'ZIF-75'
 'ZIF-75' 'ZIF-12' 'ZIF-75' 'AFEHUO' 'ZIF-75' 'ZIF-5']


In [16]:
preds = []
for p in pred:
  P = []
  for i in np.argsort(p)[::-1][0:13]:
    P.append(mapping[i])
  preds.append(P)
for p in preds:
  print(p)

['ZIF-65', 'ZIF-64', 'SAJRAW10', 'BAHGUN04', 'ENCDBZ01', 'ZIF-12', 'CIDNIM', 'ETCUCY10', 'WIWQOI', 'ZIF-14', 'JOSNAG', 'ZIF-5', 'DUT-12']
['ZIF-65', 'ZIF-64', 'ENCDBZ01', 'SAJRAW10', 'ETCUCY10', 'REGJIW02', 'BAHGUN04', 'ZIF-12', 'ZIF-5', 'MINVUA01', 'CIDNIM', 'WABVIF', 'ZIF-14']
['XAYMUF', 'ZIF-2', 'DUT-10', 'MINVUA01', 'DUT-12', 'JOKXOW', 'PUTYAE', 'GORQAF', 'EDIKEH', 'LATCIS', 'JOKYOX10', 'MIHBAG', 'IDIWOH05']
['ZIF-65', 'ZIF-64', 'ENCDBZ01', 'ZIF-5', 'ETCUCY10', 'SAJRAW10', 'REGJIW02', 'BAHGUN04', 'MINVUA01', 'WABVIF', 'ZIF-12', 'Eu', 'QEFNAQ']
['AENCESE10', 'ZIF-75', 'MIHBAG', 'ZIF-7', 'TENQOS', 'ZIF-9', 'RAFRUL', 'WABWAY', 'KEGZOL01', 'ZIF-90', 'TILWAM', 'QAYTOZ', 'MIMVEJ']
['ZIF-7', 'ZIF-75', 'ZIF-9', 'KEGZOL01', 'AENCESE10', 'WABWAY', 'MIHBAG', 'ZIF-90', 'KEXFIC10', 'WILRIS', 'TILWAM', 'ZIF-72', 'QAYTOZ']
['ZIF-75', 'ZIF-7', 'ZIF-9', 'AENCESE10', 'ZIF-90', 'KEGZOL01', 'WABWAY', 'MIHBAG', 'JOKXUC', 'WILRIS', 'UGUTOF', 'ZIF-12', 'ZIWTII']
['ZIF-75', 'CIDNIM', 'ZIF-65', 'ZIF-12', '

In [0]:
# Load and test on the experimental dat07148a
with open(ls+'test__None_.pickle', 'rb') as f:
    Test = pickle.load(f)

Test.head()

Unnamed: 0,grid_0.0,grid_0.001,grid_0.002,grid_0.003,grid_0.004,grid_0.005,grid_0.006,grid_0.007,grid_0.008,grid_0.009000000000000001,...,grid_0.992,grid_0.993,grid_0.994,grid_0.995,grid_0.996,grid_0.997,grid_0.998,grid_0.999,grid_1.0,label
0,0.033514,0.020624,0.027069,0.02333,0.0,0.046661,0.039185,0.05388,0.034029,0.057747,...,0.034544,0.065352,0.048982,0.055169,0.043825,0.048208,0.050271,0.037638,0.054138,ZIF-67
1,0.03476,0.042744,0.025364,0.007046,0.096761,0.00282,0.030059,0.021604,0.025364,0.033349,...,0.568339,0.564113,0.563644,0.537812,0.566462,0.549084,0.53828,0.569748,0.62095,ZIF-67
2,0.032539,0.04602,0.042301,0.044392,0.039976,0.062521,0.064148,0.049273,0.067402,0.05764,...,0.038581,0.042765,0.022313,0.022429,0.041836,0.031144,0.02882,0.036722,0.043695,ZIF-67
3,0.029431,0.080294,0.040947,0.06142,0.055022,0.084453,0.068458,0.102047,0.06398,0.064299,...,0.066539,0.047345,0.050544,0.03103,0.039667,0.059501,0.039667,0.045106,0.047345,ZIF-67
4,0.038178,0.038881,0.037756,0.039795,0.033115,0.037334,0.038178,0.034873,0.033397,0.039303,...,0.013148,0.009843,0.012304,0.013007,0.015116,0.010617,0.010898,0.008156,0.011742,ZIF-8


In [0]:
Test.shape

(14, 1002)

In [0]:
X_test = scaler.transform(Test.iloc[:,:-1])
pred = cnn_model.predict(X_test.reshape(14, 1001, 1))
out = [np.argmax(p) for p in pred]
out = np.array(out)
predicted = np.vectorize(mapping.get)(out)
print(predicted)

['ZIF-65' 'AFIXES' 'ZIF-65' 'ZIF-65' 'ZIF-65' 'ZIF-65' 'ZIF-65' 'ZIF-65'
 'ZIF-12' 'ZIF-65' 'ZIF-90' 'ZIF-90' 'ZIF-12' 'ZIF-12']


In [0]:
preds = []
for p in pred:
  P = []
  for i in np.argsort(p)[::-1][0:3]:
    P.append(mapping[i])
  preds.append(P)

In [0]:
for p in preds:
  print(p)

['ZIF-65', 'ZIF-90', 'ZIF-8']
['AFIXES', 'GORQAF', 'DUT-23']
['ZIF-65', 'ZIF-90', 'TENQOS']
['ZIF-65', 'ZIF-90', 'ZIF-8']
['ZIF-65', 'ZIF-90', 'TENQOS']
['ZIF-65', 'ZIF-90', 'LITHUR']
['ZIF-65', 'ZIF-90', 'TENQOS']
['ZIF-65', 'ZIF-90', 'ZIF-8']
['ZIF-12', 'ZIF-71', 'ZIF-90']
['ZIF-65', 'ZIF-90', 'LITHUR']
['ZIF-90', 'ZIF-65', 'TENQOS']
['ZIF-90', 'ZIF-65', 'TENQOS']
['ZIF-12', 'ZIF-71', 'ZIF-90']
['ZIF-12', 'ZIF-71', 'ZIF-90']


In [0]:
Test.label

0     ZIF-67
1     ZIF-67
2     ZIF-67
3     ZIF-67
4      ZIF-8
5      ZIF-8
6      ZIF-8
7     ZIF-90
8     ZIF-71
9      ZIF-8
10    ZIF-90
11    ZIF-90
12    ZIF-71
13    ZIF-71
Name: label, dtype: object