# *Load Google Drive*

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 预操作

In [None]:
# 导入包
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score, make_scorer
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from lightgbm import LGBMRegressor, LGBMClassifier
 
import os
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
 
 
# 数据预处理
def reduce_mem_usage(df):
    start_mem = df.memory_usage().sum() / 1024**2 
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')
 
    end_mem = df.memory_usage().sum() / 1024**2 
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df
 
 
# 读取数据
data = pd.read_csv('/content/drive/MyDrive/CLF_of_ECG_signals/Data/train.csv')
data_features_filtered = pd.read_csv('/content/drive/MyDrive/CLF_of_ECG_signals/Features_Extract/train_features_filtered_MinimalFCParameters.csv')
# y_test = pd.read_csv('/content/drive/MyDrive/CLF_of_ECG_signals/Data/testA.csv').values
 
# 原始特征Dataframe
data_list = []
for item in data.values:
  data_list.append([item[0]] + [float(i) for i in item[1].split(',')] + [item[2]])
data = pd.DataFrame(np.array(data_list))
data.columns = ['id'] + ['s_' + str(i) for i in range(len(data_list[0])-2)] + ['label']
 
data = reduce_mem_usage(data)
 
# 分离数据
from sklearn.model_selection import KFold
features = data.drop(['id','label'], axis=1)
labels = data['label']
 
# 划分X_train, X_val, y_train, y_val
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(features, labels)

Memory usage of dataframe is 157.93 MB
Memory usage after optimization is: 39.67 MB
Decreased by 74.9%


# LSTM

In [None]:
# 划分X_train, X_val, y_train, y_val
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(features, labels)

# 导入模型前预处理
X_train = X_train.values.reshape(X_train.shape[0],X_train.shape[1],1)
X_val = X_val.values.reshape(X_val.shape[0],X_val.shape[1],1)
y_train = y_train.values.reshape(y_train.shape[0],1)
y_val = y_val.values.reshape(y_val.shape[0],1)

print(type(X_train),X_train.shape)
print(type(X_val),X_val.shape)
print(type(y_train),y_train.shape)
print(type(y_val),y_val.shape)


<class 'numpy.ndarray'> (75000, 205, 1)
<class 'numpy.ndarray'> (25000, 205, 1)
<class 'numpy.ndarray'> (75000, 1)
<class 'numpy.ndarray'> (25000, 1)


In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Embedding
from keras.layers import LSTM
from tensorflow.compat.v1.keras.layers import CuDNNLSTM,BatchNormalization,Dropout
from sklearn.preprocessing import OneHotEncoder
import tensorflow.compat.v1 as tf

model = Sequential()
model.add(CuDNNLSTM(128, input_shape=(X_train.shape[1], X_train.shape[2])))  #  return_sequences=True
# model.add(Dropout(0.2))
model.add(BatchNormalization())
# # model.add(CuDNNLSTM(128, return_sequences=True))
# model.add(CuDNNLSTM(64))
# model.add(Dropout(0.2))
model.add(BatchNormalization())
model.add(Dense(4, activation='softmax'))

optimizer = tf.train.AdamOptimizer(learning_rate = 0.01)  # 后期loss无法进一步减小需要灵活调整learning_rate

model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=2, batch_size=100,  validation_split =0.05)
# model.save('/content/drive/MyDrive/CLF_of_ECG_signals/Result_Submission/Model/LSTM_ECG_0424-256.h5')

def feval_abs_sum(preds, labels):
  # preds = np.argmax(preds.reshape(4,-1),axis=0)
  onehot_encoder = OneHotEncoder(sparse=False)
  labels_ = onehot_encoder.fit_transform(labels.reshape(len(labels), 1))
  preds_ = onehot_encoder.fit_transform(preds.reshape(len(preds), 1))
  print(labels_.shape, '\n', preds_.shape)
  score_for_Competition = sum(sum(abs(labels_ - preds_)))
  return score_for_Competition

y_pred = model.predict_classes(X_val)
print(y_pred.shape)
score_for_Competition = feval_abs_sum(y_pred, y_val)
print('score_for_Competition:',score_for_Competition)

# 20个hidden layer, 执行100步得到val_accuracy: 0.9400
# 20个hidden layer, 执行200步得到val_accuracy: 0.9651, score_for_Competition:1656


Epoch 1/2
Epoch 2/2




(25000,)
(25000, 4) 
 (25000, 1)
score_for_Competition: 75000.0


In [None]:
# from keras.models import load_model
import keras
# model_path = '/content/drive/MyDrive/CLF_of_ECG_signals/Result_Submission/Model/LSTM_ECG_0424-128.h5'

# model = load_model(model_path)
optimizer = keras.optimizers.Adam(lr=0.0001)
# optimizer = tf.train.AdamOptimizer(learning_rate = 0.0001)  # 灵活调整learning_rate
model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=20, batch_size=100,  validation_split =0.05)
# 200 + 30 + 20

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
model.save('/content/drive/MyDrive/CLF_of_ECG_signals/Result_Submission/Model/LSTM_ECG_0428-128.h5')

In [None]:
# history = model.fit(X_train, y_train, epochs=100, batch_size=100,  validation_split =0.05)
# model.save('/content/drive/MyDrive/CLF_of_ECG_signals/Result_Submission/Model/LSTM_ECG_0424-20.h5')

def feval_abs_sum(preds, labels):
  # preds = np.argmax(preds.reshape(4,-1),axis=0)
  onehot_encoder = OneHotEncoder(sparse=False)
  labels_ = onehot_encoder.fit_transform(labels.reshape(len(labels), 1))
  preds_ = onehot_encoder.fit_transform(preds.reshape(len(preds), 1))
  print(labels_.shape, '\n', preds_.shape)
  score_for_Competition = sum(sum(abs(labels_ - preds_)))
  return score_for_Competition

y_pred = model.predict_classes(X_val)
print(y_pred.shape)
score_for_Competition = feval_abs_sum(y_pred, y_val)
print('score_for_Competition:',score_for_Competition)



(25000,)
(25000, 4) 
 (25000, 4)
score_for_Competition: 218.0


In [None]:
# 加载模型
from keras.models import load_model
from sklearn.model_selection import KFold
model_path = '/content/drive/MyDrive/CLF_of_ECG_signals/Result_Submission/Model/LSTM_ECG_0428-128.h5'
model_lstm = load_model(model_path)


from sklearn.preprocessing import OneHotEncoder
def feval_abs_sum(preds, labels):  # 导入的都是稀疏标签
  score_for_Competition = 0
  for i in range(preds.shape[0]):
    if preds[i] != np.array(labels)[i]:
      score_for_Competition += 2
  return score_for_Competition



# 5KFold训练
import keras
### 使用5折交叉验证进行模型性能评估
'''使用lightgbm 5折交叉验证进行建模预测'''

cv_scores = []
kf = KFold(n_splits=5, shuffle=True, random_state=1)
for i, (train_index, valid_index) in enumerate(kf.split(features, labels)):
  print('********************************* {} *********************************'.format(str(i+1)))
  X_train, y_train, X_val, y_val = \
  features.iloc[train_index], labels[train_index], features.iloc[valid_index], labels[valid_index]

  X_train = X_train.values.reshape(X_train.shape[0],X_train.shape[1],1)
  X_val = X_val.values.reshape(X_val.shape[0],X_val.shape[1],1)
  y_train = y_train.values.reshape(y_train.shape[0],1)
  y_val = y_val.values.reshape(y_val.shape[0],1)

  # optimizer = keras.optimizers.Adam(lr=1e-9, beta_1=0.9, beta_2=0.99, epsilon=1e-08, decay=0.0)
  # model_CNN.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
  # history = model_CNN.fit(X_train, y_train, epochs=5, batch_size=36,  validation_split =0.05)
  
  y_pred = model_lstm.predict_classes(X_val)  
  # print(y_pred)
  # print(y_val)
  score_for_Competition = feval_abs_sum(y_pred, y_val)
  print('score_for_Competition:',score_for_Competition)
  cv_scores.append(score_for_Competition)


print(cv_scores)
print('CNN_1D_score_KFoldtrain_list:{}'.format(cv_scores))
print('CNN_1D_score_mean:{}'.format(np.mean(cv_scores)))
print('CNN_1D_score_std:{}'.format(np.std(cv_scores)))


# time_cost: 14分54秒


********************************* 1 *********************************




score_for_Competition: 48
********************************* 2 *********************************
score_for_Competition: 56
********************************* 3 *********************************
score_for_Competition: 32
********************************* 4 *********************************
score_for_Competition: 52
********************************* 5 *********************************
score_for_Competition: 54
[48, 56, 32, 52, 54]
CNN_1D_score_KFoldtrain_list:[48, 56, 32, 52, 54]
CNN_1D_score_mean:48.4
CNN_1D_score_std:8.616263691415208


# CNN + BiLSTM + Attention

参考资料: [使用Keras实现CNN+BiLSTM+Attention的多维(多变量)时间序列预测](https://blog.csdn.net/qq_35649669/article/details/104793484)

In [None]:
from keras.layers import Input, Dense, LSTM, merge ,Conv1D,Dropout,Bidirectional,Multiply
from keras.models import Model

from keras.layers import merge
from keras.layers.core import *
from keras.layers.recurrent import LSTM
from keras.models import *

import  pandas as pd
import  numpy as np

In [None]:
# 划分X_train, X_val, y_train, y_val
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(features, labels)


# 导入模型前预处理
X_train = X_train.values.reshape(X_train.shape[0],X_train.shape[1],1)
X_val = X_val.values.reshape(X_val.shape[0],X_val.shape[1],1)
y_train = y_train.values.reshape(y_train.shape[0],1)
y_val = y_val.values.reshape(y_val.shape[0],1)

print(type(X_train),X_train.shape)
print(type(X_val),X_val.shape)
print(type(y_train),y_train.shape)
print(type(y_val),y_val.shape)


<class 'numpy.ndarray'> (75000, 205, 1)
<class 'numpy.ndarray'> (25000, 205, 1)
<class 'numpy.ndarray'> (75000, 1)
<class 'numpy.ndarray'> (25000, 1)


In [None]:

SINGLE_ATTENTION_VECTOR = False
def attention_3d_block(inputs):
    # inputs.shape = (batch_size, time_steps, input_dim)
    input_dim = int(inputs.shape[2])
    a = inputs
    #a = Permute((2, 1))(inputs)
    #a = Reshape((input_dim, TIME_STEPS))(a) # this line is not useful. It's just to know which dimension is what.
    a = Dense(input_dim, activation='softmax')(a)
    if SINGLE_ATTENTION_VECTOR:
        a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(a)
        a = RepeatVector(input_dim)(a)
    a_probs = Permute((1, 2), name='attention_vec')(a)
    output_attention_mul = merge([inputs, a_probs], name='attention_mul', mode='mul')
    return output_attention_mul

def attention_model(TIME_STEPS, INPUT_DIMS):
    inputs = Input(shape=(TIME_STEPS, INPUT_DIMS))

    x = Conv1D(filters = 64, kernel_size = 1, activation = 'relu')(inputs)  #, padding = 'same'
    x = Dropout(0.3)(x)

    #lstm_out = Bidirectional(LSTM(lstm_units, activation='relu'), name='bilstm')(x)
    lstm_out = Bidirectional(LSTM(lstm_units, return_sequences=True))(x)
    attention_mul = attention_3d_block(lstm_out)
    attention_mul = Flatten()(attention_mul)

    output = Dense(1, activation='sigmoid')(attention_mul)
    model = Model(inputs=[inputs], outputs=output)
    return model


In [None]:
INPUT_DIMS = 1
TIME_STEPS = 205
lstm_units = 64

model = attention_model(TIME_STEPS=205, INPUT_DIMS=1)
model.summary()
model.compile(optimizer='adam', loss='mse')
model.fit([X_train], y_train, epochs=10, batch_size=64, validation_split=0.1)
# model.save('/content/drive/MyDrive/CLF_of_ECG_signals/Result_Submission/Model/LSTM_ECG_0424-256.h5')

TypeError: ignored

# CNN_1D

In [None]:
# 划分X_train, X_val, y_train, y_val
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(features, labels, random_state=507, test_size=0.2)
 
# 导入模型前预处理
X_train = X_train.values.reshape(X_train.shape[0],X_train.shape[1],1)
X_val = X_val.values.reshape(X_val.shape[0],X_val.shape[1],1)
y_train = y_train.values.reshape(y_train.shape[0],1)
y_val = y_val.values.reshape(y_val.shape[0],1)
 
print(type(X_train),X_train.shape)
print(type(X_val),X_val.shape)
print(type(y_train),y_train.shape)
print(type(y_val),y_val.shape)

<class 'numpy.ndarray'> (80000, 205, 1)
<class 'numpy.ndarray'> (20000, 205, 1)
<class 'numpy.ndarray'> (80000, 1)
<class 'numpy.ndarray'> (20000, 1)


In [None]:
import keras
from scipy.io import loadmat
import matplotlib.pyplot as plt 
import glob
import numpy as np
import pandas as pd
import math
import os
import keras
from keras.layers import *
from keras.models import *
from keras.objectives import *
from sklearn.preprocessing import OneHotEncoder
 
 
"""GPU设置为按需增长"""
import os
import tensorflow.compat.v1 as tf
# from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1.keras.backend import set_session
 
os.environ["CUDA_VISIBLE_DEVICES"] = "0" #有多个GPU时可以指定只使用第几号GPU
config = tf.ConfigProto()
config.allow_soft_placement=True #允许动态放置张量和操作符
config.gpu_options.per_process_gpu_memory_fraction = 0.4 #最多使用40%GPU内存
config.gpu_options.allow_growth=True   #初始化时不全部占满GPU显存, 按需分配 
sess = tf.Session(config = config)
set_session(sess)

In [None]:
#%tensorflow_version 1.x   # 使用1.x版本的TF
 
# 用CNN做序列特征提取
import keras
from scipy.io import loadmat
import matplotlib.pyplot as plt 
import glob
import numpy as np
import pandas as pd
import math
import os
import keras
from keras.layers import *
from keras.models import *
from keras.objectives import *
from sklearn.preprocessing import OneHotEncoder
 
 
"""GPU设置为按需增长"""
import os
import tensorflow.compat.v1 as tf
# from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1.keras.backend import set_session
 
os.environ["CUDA_VISIBLE_DEVICES"] = "0" #有多个GPU时可以指定只使用第几号GPU
config = tf.ConfigProto()
config.allow_soft_placement=True #允许动态放置张量和操作符
config.gpu_options.per_process_gpu_memory_fraction = 0.4 #最多使用40%GPU内存
config.gpu_options.allow_growth=True   #初始化时不全部占满GPU显存, 按需分配 
sess = tf.Session(config = config)
set_session(sess)
 
from keras import backend as K
 
 
TIME_PERIODS = 205
num_sensors = 1
def build_model(input_shape=(TIME_PERIODS,num_sensors),num_classes=4):
    model = Sequential()
    # model.add(Reshape((TIME_PERIODS, num_sensors), input_shape=input_shape))
    model.add(Conv1D(64, 8, strides=1, activation='relu',input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(Conv1D(64, 8, strides=1, activation='relu',padding="same"))     
    model.add(BatchNormalization())
    model.add(Conv1D(64, 8, strides=1, activation='relu',padding="same"))     
    model.add(BatchNormalization())
    model.add(MaxPooling1D(2))  

    model.add(Conv1D(128, 4, strides=1, activation='relu',input_shape=input_shape)) 
    model.add(BatchNormalization())
    model.add(Conv1D(128, 4, strides=1, activation='relu',padding="same"))     
    model.add(BatchNormalization())
    model.add(Conv1D(128, 4, strides=1, activation='relu',padding="same"))     
    model.add(BatchNormalization())
    model.add(MaxPooling1D(2))   

    model.add(Conv1D(256, 3,strides=1, activation='relu',padding="same"))     
    model.add(BatchNormalization())
    model.add(Conv1D(256, 3,strides=1, activation='relu',padding="same"))     
    model.add(BatchNormalization())
    model.add(Conv1D(256, 3,strides=1, activation='relu',padding="same"))     
    model.add(BatchNormalization())
    model.add(MaxPooling1D(2))    

    model.add(GlobalAveragePooling1D())                      
    model.add(Dropout(0.3))
    model.add(Dense(num_classes, activation='softmax'))           
    return(model)
 
K.clear_session()
 
model_CNN = build_model(input_shape=(TIME_PERIODS,num_sensors),num_classes=4)
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=50)
# lr_schedule = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=1e-4, decay_steps=50, decay_rate=0.9)
# optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)
model_CNN.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model_CNN.fit(X_train, y_train, epochs=100, batch_size=100, \
             validation_split =0.05, callbacks= [callback] )
# model_CNN.save('/content/drive/MyDrive/CLF_of_ECG_signals/Result_Submission/Model/CNN_ECG_0501.h5')

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
import tensorflow as tf
callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=30)
lr_schedule = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=1e-5, decay_steps=25, decay_rate=0.0)
optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)
model_CNN.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
history = model_CNN.fit(X_train, y_train, epochs=5, batch_size=50, \
             validation_split =0.1, callbacks= [callback] )
model_CNN.save('/content/drive/MyDrive/CLF_of_ECG_signals/Result_Submission/Model/CNN_ECG_0517.h5')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
from keras.models import load_model
import keras
# path_model = '/content/drive/MyDrive/CLF_of_ECG_signals/Result_Submission/Model/CNN_ECG_0509.h5'
# model_CNN = load_model(path_model)
optimizer = keras.optimizers.Adam(lr=1e-5, beta_1=0.9, beta_2=0.99, epsilon=1e-08, decay=0.0)
model_CNN.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
history = model_CNN.fit(X_train, y_train, epochs=20, batch_size=100,  validation_split =0.05)
# 500 + 
# model_CNN.save('/content/drive/MyDrive/CLF_of_ECG_signals/Result_Submission/Model/CNN_ECG_0430.h5')

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
from sklearn.preprocessing import OneHotEncoder
from keras.models import load_model
def feval_abs_sum(preds, labels):  # 导入的都是稀疏标签
  score_for_Competition = 0
  for i in range(preds.shape[0]):
    if preds[i] != np.array(labels)[i]:
      score_for_Competition += 2
  return score_for_Competition
 
model_CNN = load_model('/content/drive/MyDrive/CLF_of_ECG_signals/Result_Submission/Model/CNN_ECG_0517.h5')
y_pred = model_CNN.predict_classes(X_val)  # 亦可用predict_proba函数得到各类别的预测概率（PS：不需要转换为OneHot code)
# print(y_pred)
# print(y_val)
score_for_Competition = feval_abs_sum(y_pred, y_val)
print('预计上榜得分:',score_for_Competition*0.8)
 
 
# 以该CNN_1D结构计算30步后得到score_for_Competition为904，明显次于lightGBM的531.2， 记录于2021/04/22/01:03，模型名称: CNN_ECG_0422.h5
# 以该CNN_1D结构计算30步后得到score_for_Competition为634，次于lightGBM的531.2， 记录于2021/04/22/03:47，模型名称: CNN_ECG_0422_1.h5
# 以该CNN_1D结构计算200步后得到score_for_Competition为576，次于lightGBM的531.2， 记录于2021/04/27/23:47，模型名称: CNN_ECG_0427.h5



In [None]:
# 加载模型
from keras.models import load_model
from sklearn.model_selection import KFold
# model_path = '/content/drive/MyDrive/CLF_of_ECG_signals/Result_Submission/Model/CNN_ECG_0430.h5'
# model_CNN = load_model(model_path)


from sklearn.preprocessing import OneHotEncoder
def feval_abs_sum(preds, labels):  # 导入的都是稀疏标签
  score_for_Competition = 0
  for i in range(preds.shape[0]):
    if preds[i] != np.array(labels)[i]:
      score_for_Competition += 2
  return score_for_Competition


# 5KFold验证
import keras
### 使用5折交叉验证进行模型性能评估
'''使用lightgbm 5折交叉验证进行建模预测'''
def KFold_Validation(features, labels):
  cv_scores = []
  kf = KFold(n_splits=5, shuffle=True, random_state=1)
  for i, (train_index, valid_index) in enumerate(kf.split(features, labels)):
    print('********************************* {} *********************************'.format(str(i+1)))
    X_train_, y_train_, X_val_, y_val_ = \
    features.iloc[train_index], labels[train_index], features.iloc[valid_index], labels[valid_index]

    X_train_ = X_train_.values.reshape(X_train_.shape[0],X_train_.shape[1],1)
    X_val_ = X_val_.values.reshape(X_val_.shape[0],X_val_.shape[1],1)
    y_train_ = y_train_.values.reshape(y_train_.shape[0],1)
    y_val_ = y_val_.values.reshape(y_val_.shape[0],1)

    # optimizer = keras.optimizers.Adam(lr=1e-9, beta_1=0.9, beta_2=0.99, epsilon=1e-08, decay=0.0)
    # model_CNN.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    # history = model_CNN.fit(X_train, y_train, epochs=5, batch_size=36,  validation_split =0.05)
    
    y_pred_ = model_CNN.predict_classes(X_val_)  
    # print(y_pred)
    # print(y_val)
    score_for_Competition = feval_abs_sum(y_pred_, y_val_)
    print('score_for_Competition:',score_for_Competition)
    cv_scores.append(score_for_Competition)


  print(cv_scores)
  print('CNN_1D_score_KFoldtrain_list:{}'.format(cv_scores))
  print('CNN_1D_score_mean:{}'.format(np.mean(cv_scores)))
  print('CNN_1D_score_std:{}'.format(np.std(cv_scores)))

  return None

KFold_Validation(features, labels)



# time_cost: 14分54秒


********************************* 1 *********************************




score_for_Competition: 84
********************************* 2 *********************************
score_for_Competition: 94
********************************* 3 *********************************
score_for_Competition: 80
********************************* 4 *********************************
score_for_Competition: 70
********************************* 5 *********************************
score_for_Competition: 66
[84, 94, 80, 70, 66]
CNN_1D_score_KFoldtrain_list:[84, 94, 80, 70, 66]
CNN_1D_score_mean:78.8
CNN_1D_score_std:10.007996802557443


In [None]:
model_CNN.save('/content/drive/MyDrive/CLF_of_ECG_signals/Result_Submission/Model/CNN_ECG_0429_1.h5')

In [None]:
# 输出预测结果
X_test = pd.read_csv('/content/drive/MyDrive/CLF_of_ECG_signals/Data/testA.csv')

# X_test处理
X_test_list = []
for item in X_test.values:
  X_test_list.append([item[0]] + [float(i) for i in item[1].split(',')])
X_test = pd.DataFrame(np.array(X_test_list))
X_test.columns = ['id'] + ['s_' + str(i) for i in range(len(data_list[0])-2)]
X_test = X_test.drop(['id'], axis=1)
X_test = reduce_mem_usage(X_test)


# 划分X_train, X_val, y_train, y_val
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(features, labels)

# 导入模型前预处理
X_train = X_train.values.reshape(X_train.shape[0],X_train.shape[1],1)
X_val = X_val.values.reshape(X_val.shape[0],X_val.shape[1],1)
y_train = y_train.values.reshape(y_train.shape[0],1)
y_val = y_val.values.reshape(y_val.shape[0],1)
X_test = X_test.values.reshape(X_test.shape[0],X_test.shape[1],1)

print(type(X_train),X_train.shape)
print(type(X_val),X_val.shape)
print(type(y_train),y_train.shape)
print(type(y_val),y_val.shape)
print(type(X_test),X_test.shape)


# 验证集上预测
val_pred = model_CNN.predict_classes(X_val.reshape(X_val.shape[0], X_val.shape[1],1))
# val_pred_Stacking = np.argmax(val_pred_Stacking,axis=1)
score_for_Competition = feval_abs_sum(val_pred, y_val)  # 注意区分y_true和y_val
print('score_for_Competition in Train_Dataset:',score_for_Competition)
sum = 0
for i in range(y_val.shape[0]):
  if val_pred[i] == np.array(y_val)[i]:
    sum += 1

print('验证集预测准确率：', float(sum/y_val.shape[0]))


# 测试集上预测
test_pred = model_CNN.predict_classes(X_test.reshape(X_test.shape[0], X_test.shape[1],1))
# test_pred_Stacking = np.argmax(test_pred_Stacking,axis=1)
print(test_pred)

Memory usage of dataframe is 31.28 MB
Memory usage after optimization is: 7.82 MB
Decreased by 75.0%
<class 'numpy.ndarray'> (75000, 205, 1)
<class 'numpy.ndarray'> (25000, 205, 1)
<class 'numpy.ndarray'> (75000, 1)
<class 'numpy.ndarray'> (25000, 1)
<class 'numpy.ndarray'> (20000, 205, 1)




score_for_Competition in Train_Dataset: 94
验证集预测准确率： 0.99812
[0 2 3 ... 2 0 0]


In [None]:
y_preds_subm = test_pred
onehot_encoder = OneHotEncoder(sparse=False)
y_preds_subm = onehot_encoder.fit_transform(y_preds_subm.reshape(y_preds_subm.shape[0],1))
print(y_preds_subm)

result=pd.read_csv('/content/drive/MyDrive/CLF_of_ECG_signals/Data/sample_submit.csv')
result['label_0']=y_preds_subm[:,0]
result['label_1']=y_preds_subm[:,1]
result['label_2']=y_preds_subm[:,2]
result['label_3']=y_preds_subm[:,3]
print(result)
result.to_csv('/content/drive/MyDrive/CLF_of_ECG_signals/Result_Submission/submit.csv',index=False)


[[1. 0. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]
 ...
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]]
           id  label_0  label_1  label_2  label_3
0      100000      1.0      0.0      0.0      0.0
1      100001      0.0      0.0      1.0      0.0
2      100002      0.0      0.0      0.0      1.0
3      100003      1.0      0.0      0.0      0.0
4      100004      1.0      0.0      0.0      0.0
...       ...      ...      ...      ...      ...
19995  119995      1.0      0.0      0.0      0.0
19996  119996      1.0      0.0      0.0      0.0
19997  119997      1.0      0.0      0.0      0.0
19998  119998      1.0      0.0      0.0      0.0
19999  119999      1.0      0.0      0.0      0.0

[20000 rows x 5 columns]


# Reference

In [None]:
class ConvMode(Model):
    def __init__(self,dp=0.3):
        super(ConvMode, self).__init__()

        # 1维卷积综合提取特征
        self.conv1 = Sequential([
            layers.Conv1D(filters=32, kernel_size=15, strides=1, padding='same'),
            layers.BatchNormalization(),
            layers.LeakyReLU(0.2)
        ])
        self.dropout_c1 = layers.Dropout(dp)
        self.conv2 = Sequential([
            layers.Conv1D(filters=32, kernel_size=15, strides=1, padding='same'),
            layers.BatchNormalization(),
            layers.LeakyReLU(0.2)
        ])
        self.dropout_c2 = layers.Dropout(dp)
        # 1维卷积综合提取特征+降维
        self.conv3 = Sequential([
            layers.Conv1D(filters=128, kernel_size=5, strides=5, padding='valid'),
            layers.BatchNormalization(),
            layers.LeakyReLU(0.2)
        ])
        self.dropout_c3 = layers.Dropout(dp)
        # 1维卷积综合提取特征
        self.conv4 = Sequential([
            layers.Conv1D(filters=128, kernel_size=7, strides=1, padding='same'),
            layers.BatchNormalization(),
            layers.LeakyReLU(0.2)
        ])
        self.dropout_c4 = layers.Dropout(dp)
        self.conv5 = Sequential([
            layers.Conv1D(filters=128, kernel_size=7, strides=1, padding='same'),
            layers.BatchNormalization(),
            layers.LeakyReLU(0.2)
        ])
        self.dropout_c5 = layers.Dropout(dp)
        # 1维卷积综合提取特征+降维
        self.conv6 = Sequential([
            layers.Conv1D(filters=128, kernel_size=6, strides=6, padding='valid'),
            layers.BatchNormalization(),
            layers.LeakyReLU(0.2)
        ])
        self.flatten = layers.Flatten()
        # 最终作出预测
        self.fc = Sequential([
            layers.Dense(4),
            layers.Activation('softmax')
        ])

    def call(self, inputs, training=None, mask=None):

        x = self.conv1(inputs)
        x = self.dropout_c1(x, training=training)
        x = self.conv2(x)
        x = self.dropout_c2(x, training=training)
        x = self.conv3(x)
        x = self.dropout_c3(x, training=training)
        x = self.conv4(x)
        x = self.dropout_c4(x, training=training)
        x = self.conv5(x)
        x = self.dropout_c5(x, training=training)
        x = self.conv6(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x