In [46]:
import numpy as np # 导入NumPy
import pandas as pd # 导入Pandas
df_train = pd.read_csv('../input/new-earth/exoTrain.csv') # 导入训练集
df_test = pd.read_csv('../input/new-earth/exoTest.csv') # 导入测试集
print(df_train.head()) # 输入头几行数据
print(df_train.info()) # 输出训练集信息

In [47]:
from sklearn.utils import shuffle # 导入乱序工具
df_train = shuffle(df_train) # 乱序训练集
df_test = shuffle(df_test)  # 乱序测试集

In [48]:
X_train = df_train.iloc[:, 1:].values # 构建特征集（训练）
y_train = df_train.iloc[:, 0].values # 构建标签集（训练）
X_test = df_test.iloc[:, 1:].values # 构建特征集（测试）
y_test = df_test.iloc[:, 0].values # 构建标签集（测试）
y_train = y_train - 1 # 标签转换成惯用的(0，1)分类
y_test = y_test - 1 # 标签转换成惯用的(0，1)分类
print (X_train) # 打印训练集中的特征
print (y_train) # 打印训练集中的标签

In [49]:
X_train = np.expand_dims(X_train, axis=2) # 张量升阶，以满足序列数据集的要求
X_test = np.expand_dims(X_test, axis=2) # 张量升阶，以满足序列数据集的要求

In [50]:
from keras.models import Sequential # 导入序贯模型
from keras import layers # 导入所有类型的层
from tensorflow.keras.optimizers import Adam # 导入优化器
model = Sequential() # 序贯模型
model.add(layers.Conv1D(32, kernel_size=10, strides=4,
          input_shape=(3197, 1))) # 1D CNN层
model.add(layers.MaxPooling1D(pool_size=4, strides=2)) # 池化层
model.add(layers.GRU(256, return_sequences=True)) # 关键，GRU层够要大
model.add(layers.Flatten()) # 展平
model.add(layers.Dropout(0.5)) # Dropout层
model.add(layers.BatchNormalization()) # 批标准化   
model.add(layers.Dense(1, activation='sigmoid')) # 分类输出层
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, decay=0.01) # 设置优化器
model.compile(optimizer=opt, # 优化器
              loss = 'binary_crossentropy', # 交叉熵
              metrics=['accuracy']) # 准确率

In [51]:
history = model.fit(X_train,y_train, # 训练集
                    validation_split = 0.2, # 部分训练集数据拆分成验证集
                    batch_size = 128, # 批量大小
                    epochs = 4, # 训练轮次
                    shuffle = True) # 乱序

In [52]:
from sklearn.metrics import classification_report # 分类报告
from sklearn.metrics import confusion_matrix # 混淆矩阵
y_prob = model.predict(X_test) # 对测试集进行预测
y_pred =  np.where(y_prob > 0.5, 1, 0) #将概率值转换成真值
cm = confusion_matrix(y_pred, y_test)
print('Confusion matrix:\n', cm, '\n')
print(classification_report(y_pred, y_test))

In [53]:
# for i in range(len(y_prob)):
#      if y_prob[i] >= 0.5: 
#         y_pred[i] = 1
#      else:
#         y_pred[i] = 0

In [54]:
y_pred =  np.where(y_prob > 0.15, 1, 0) # 进行阈值调整
cm = confusion_matrix(y_pred, y_test) 
print('Confusion matrix:\n', cm, '\n')
print(classification_report(y_pred, y_test))

**下面是两个函数式API的构建代码段，请读者自行研究如何使用函数式API建构更灵活的模型。**

In [56]:
from keras import layers # 导入各种层
from keras.models import Model # 导入模型
from tensorflow.keras.optimizers import Adam # 导入Adam优化器
input = layers.Input(shape=(3197, 1)) # Input
# 通过函数式API构建模型
x = layers.Conv1D(32, kernel_size=10, strides=4)(input)
x = layers.MaxPooling1D(pool_size=4, strides=2)(x)
x = layers.GRU(256, return_sequences=True)(x)
x = layers.Flatten()(x)
x = layers.Dropout(0.5)(x)
x = layers.BatchNormalization()(x)
output = layers.Dense(1, activation='sigmoid')(x) # Output
model = Model(input, output) 
model.summary() # 显示模型的输出
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, decay=0.01) # 设置优化器
model.compile(optimizer=opt, # 优化器
              loss = 'binary_crossentropy', # 交叉熵
              metrics=['accuracy']) # 准确率

In [57]:
# 构建正向网络
input_1 = layers.Input(shape=(3197, 1))
x = layers.GRU(32, return_sequences=True)(input_1)
x = layers.Flatten()(x)
x = layers.Dropout(0.5)(x)
# 构建逆向网络
input_2 = layers.Input(shape=(3197, 1))
y = layers.GRU(32, return_sequences=True)(input_2)
y = layers.Flatten()(y)
y = layers.Dropout(0.5)(y)
# 连接两个网络
z = layers.concatenate([x, y])
output = layers.Dense(1, activation='sigmoid')(z)
model = Model([input_1,input_2], output)
model.summary()