# 加载环境

In [1]:
from Bio import SeqIO
from Bio.Alphabet import IUPAC
from Bio.SeqUtils import GC
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas import DataFrame

import keras
from keras.utils import np_utils
from keras.layers import Input, TimeDistributed, Flatten, Dense, Dropout
import tensorflow as tf
import keras.backend.tensorflow_backend as KTF


Using TensorFlow backend.


# 超参数

In [2]:
SEED = 727
LENGTH = 1000
EPOCHES = 100
BATCH_SIZE = 300
HIDDEN_UNITS = 1024

# 读入数据

In [3]:
train = np.load( "train.npz" )
test = np.load( "test.npz" )
train_list = []
test = []
for i in range(len(train['X_train'])):
    train_list.append(np.transpose(train['X_train'][i])
for i in range(len(test['X_train'])):
    test_list.append(np.transpose(test['X_train'][i])
X_train = np.array(train_list)
Y_train = train['y_train']
X_test = np.array(test_list)
Y_test = test['y_test']

MemoryError: 

In [None]:
print("number of data for train: " + str(len(X_train)))
print("number of data for test: " + str(len(X_test)))
print("number of label for train: " + str(len(Y_train)))
print("number of label for test: " + str(len(Y_test)))

# 格式整理

In [None]:
X_train4D=X_train.reshape(X_train.shape[0],LENGTH,20,1).astype('float32')
X_test4D=X_test.reshape(X_test.shape[0],LENGTH,20,1).astype('float32')

In [None]:
Y_train_One_Hot = np_utils.to_categorical(Y_train)
Y_test_One_Hot = np_utils.to_categorical(Y_test)

# 垃圾回收

In [None]:
del(X_train, X_test, Y_train)

# 建立模型

In [None]:
from keras.models import Sequential
from keras.layers import Dense,Dropout,Flatten,Conv2D,MaxPooling2D

In [None]:
model = Sequential()

## 卷积层1 (20×LENGTH 16层)

In [None]:
model.add(Conv2D(filters=16,
                 kernel_size=(5,5),
                 padding='same',
                 input_shape=(LENGTH,20,1), 
                 activation='relu'))

## 平坦层 (5×¼LENGTH*36个神经元)

In [None]:
model.add(Flatten())

## 隐藏层 (HIDDEN_LAYER_NEURONS个神经元)

In [None]:
model.add(Dense(HIDDEN_UNITS, activation='relu'))

In [None]:
model.add(Dropout(0.5))

## 输出层 (11个神经元)

In [None]:
model.add(Dense(10,activation='softmax'))

In [None]:
print(model.summary())

# 训练模型

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',metrics=['accuracy']) 

In [None]:
train_history=model.fit(x=X_train4D, 
                        y=Y_train_One_Hot,validation_split=0.2, 
                        epochs=EPOCHES, batch_size=BATCH_SIZE,verbose=2)

In [None]:
def show_train_history_acc(train_acc,test_acc):
    plt.plot(train_history.history[train_acc])
    plt.plot(train_history.history[test_acc])
    plt.ylim((0, 1))
    plt.title('Train History')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
def show_train_history_loss(train_loss,test_loss):
    plt.plot(train_history.history[train_loss])
    plt.plot(train_history.history[test_loss])
    plt.ylim((0, 1))         
    plt.title('Train History')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

In [None]:
show_train_history_acc('acc','val_acc')

In [None]:
show_train_history_loss('loss','val_loss')

# 评估模型的准确率

In [None]:
scores = model.evaluate(X_test4D , Y_test_One_Hot)
scores[1]

In [None]:
prediction=model.predict_classes(X_test4D)

In [None]:
import pandas as pd
pd.crosstab(Y_test,prediction,
            rownames=['label'],colnames=['predict'])

In [None]:
predicted_Probaility = model.predict(X_test4D)

In [None]:
pd.DataFrame(predicted_Probaility[0:50])

In [None]:
Y_test[0]